diff --git a/.github/wordlist.txt b/.github/wordlist.txt index 95a4c7849..e12ab62b1 100644 --- a/.github/wordlist.txt +++ b/.github/wordlist.txt @@ -167,4 +167,5 @@ Shubham Sood Toolchains jetic -Öztürk \ No newline at end of file +Öztürk +reconfigures \ No newline at end of file diff --git a/.github/workflows/assets/grpcurl.yaml b/.github/workflows/assets/grpcurl.yaml new file mode 100644 index 000000000..b0b52100d --- /dev/null +++ b/.github/workflows/assets/grpcurl.yaml @@ -0,0 +1,152 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sleep +spec: + replicas: 1 + selector: + matchLabels: + app: sleep + template: + metadata: + labels: + app: sleep + spec: + containers: + - name: sleep + image: fullstorydev/grpcurl:latest-alpine + command: ["/bin/sh", "-c", "source command.sh"] + workingDir: /demo + imagePullPolicy: IfNotPresent + volumeMounts: + - name: config-volume + mountPath: /demo + volumes: + - name: config-volume + configMap: + name: demo-input +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: demo-input +data: + route_guide.proto: | + // Copyright 2015 gRPC authors. + // + // Licensed under the Apache License, Version 2.0 (the "License"); + // you may not use this file except in compliance with the License. + // You may obtain a copy of the License at + // + // http://www.apache.org/licenses/LICENSE-2.0 + // + // Unless required by applicable law or agreed to in writing, software + // distributed under the License is distributed on an "AS IS" BASIS, + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + // See the License for the specific language governing permissions and + // limitations under the License. + + syntax = "proto3"; + + option go_package = "google.golang.org/grpc/examples/route_guide/routeguide"; + option java_multiple_files = true; + option java_package = "io.grpc.examples.routeguide"; + option java_outer_classname = "RouteGuideProto"; + + package routeguide; + + // Interface exported by the server. + service RouteGuide { + // A simple RPC. + // + // Obtains the feature at a given position. + // + // A feature with an empty name is returned if there's no feature at the given + // position. + rpc GetFeature(Point) returns (Feature) {} + + // A server-to-client streaming RPC. + // + // Obtains the Features available within the given Rectangle. Results are + // streamed rather than returned at once (e.g. in a response message with a + // repeated field), as the rectangle may cover a large area and contain a + // huge number of features. + rpc ListFeatures(Rectangle) returns (stream Feature) {} + + // A client-to-server streaming RPC. + // + // Accepts a stream of Points on a route being traversed, returning a + // RouteSummary when traversal is completed. + rpc RecordRoute(stream Point) returns (RouteSummary) {} + + // A Bidirectional streaming RPC. + // + // Accepts a stream of RouteNotes sent while a route is being traversed, + // while receiving other RouteNotes (e.g. from other users). + rpc RouteChat(stream RouteNote) returns (stream RouteNote) {} + } + + // Points are represented as latitude-longitude pairs in the E7 representation + // (degrees multiplied by 10**7 and rounded to the nearest integer). + // Latitudes should be in the range +/- 90 degrees and longitude should be in + // the range +/- 180 degrees (inclusive). + message Point { + int32 latitude = 1; + int32 longitude = 2; + } + + // A latitude-longitude rectangle, represented as two diagonally opposite + // points "lo" and "hi". + message Rectangle { + // One corner of the rectangle. + Point lo = 1; + + // The other corner of the rectangle. + Point hi = 2; + } + + // A feature names something at a given point. + // + // If a feature could not be named, the name is empty. + message Feature { + // The name of the feature. + string name = 1; + + // The point where the feature is detected. + Point location = 2; + } + + // A RouteNote is a message sent while at a given point. + message RouteNote { + // The location from which the message is sent. + Point location = 1; + + // The message to be sent. + string message = 2; + } + + // A RouteSummary is received in response to a RecordRoute rpc. + // + // It contains the number of individual points received, the number of + // detected features, and the total distance covered as the cumulative sum of + // the distance between each point. + message RouteSummary { + // The number of points received. + int32 point_count = 1; + + // The number of known features passed while traversing the route. + int32 feature_count = 2; + + // The distance covered in metres. + int32 distance = 3; + + // The duration of the traversal in seconds. + int32 elapsed_time = 4; + } + unary.json: | + { + "latitude": 407838351, + "longitude": -746143763 + } + command.sh: | + cat unary.json | grpcurl -plaintext -proto route_guide.proto -d @ routeguide:50051 routeguide.RouteGuide.GetFeature \ No newline at end of file diff --git a/.github/workflows/draftrelease.yaml b/.github/workflows/draftrelease.yaml index d05c20277..9db7a6cee 100644 --- a/.github/workflows/draftrelease.yaml +++ b/.github/workflows/draftrelease.yaml @@ -1,8 +1,9 @@ name: Release drafter +# Runs when changes are pushed + on: push: - # branches to consider in the event; optional, defaults to all branches: - master diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 7509f8e0c..1ff27440d 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -1,7 +1,15 @@ name: golangci-lint +# Only runs when there are golang code changes + +# Lint golang files + on: pull_request: + branches: + - master + paths: + - '**.go' permissions: contents: read diff --git a/.github/workflows/linkcheck.yaml b/.github/workflows/linkcheck.yaml index 0e8bc8e25..5e7a21f95 100644 --- a/.github/workflows/linkcheck.yaml +++ b/.github/workflows/linkcheck.yaml @@ -1,9 +1,15 @@ name: Link checker +# Only runs when there are markdown changes and intermittently + +# Check links across markdown files + on: pull_request: branches: - master + paths: + - '**.md' schedule: - cron: "0 0 1 * *" diff --git a/.github/workflows/lintcharts.yaml b/.github/workflows/lintcharts.yaml index 6d209c86a..a121cb095 100644 --- a/.github/workflows/lintcharts.yaml +++ b/.github/workflows/lintcharts.yaml @@ -1,9 +1,15 @@ name: Lint Helm charts +# Only runs when charts have changed + +# Lint Helm charts + on: pull_request: branches: - master + paths: + - charts/** jobs: # Get the paths for the Helm charts to lint diff --git a/.github/workflows/lintcharts2.yaml b/.github/workflows/lintcharts2.yaml index 3580379ad..18b9df2a1 100644 --- a/.github/workflows/lintcharts2.yaml +++ b/.github/workflows/lintcharts2.yaml @@ -4,54 +4,23 @@ name: Additional Helm chart linting # This workflow builds on the other workflow by producing Kubernetes YAML files from the templates and running kube-linter on those files # See iter8-tools/iter8#1452 +# Only runs when charts have changed + +# Lint Helm charts +# Use templates to create Kubernetes YAML files and lint them + on: pull_request: branches: - master + paths: + - charts/** jobs: http-experiment: name: Lint HTTP experiment runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/iter8 folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/iter8 - - - uses: azure/setup-helm@v3 - if: steps.modified-files.outputs.any_modified == 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Create Kubernetes YAML file - if: steps.modified-files.outputs.any_modified == 'true' - run: | - helm template charts/iter8 \ - --set "tasks={ready,http,assess}" \ - --set ready.deploy=httpbin \ - --set ready.service=httpbin \ - --set ready.timeout=60s \ - --set http.url=http://httpbin.default/get \ - --set assess.SLOs.upper.http/latency-mean=50 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=job >> iter8.yaml - - - name: Lint Kubernetes YAML file - if: steps.modified-files.outputs.any_modified == 'true' - uses: stackrox/kube-linter-action@v1 - with: - directory: iter8.yaml - - http-looped-experiment: - name: Lint HTTP looped experiment - runs-on: ubuntu-latest - steps: - name: Check out code uses: actions/checkout@v3 @@ -72,9 +41,7 @@ jobs: run: | helm template charts/iter8 \ --set tasks={http} \ - --set http.url="http://httpbin.default/get" \ - --set runner=cronjob \ - --set cronjobSchedule="*/1 * * * *" >> iter8.yaml + --set http.url=http://httpbin.default/get >> iter8.yaml - name: Lint Kubernetes YAML file if: steps.modified-files.outputs.any_modified == 'true' @@ -108,47 +75,10 @@ jobs: --set tasks={grpc} \ --set grpc.host="hello.default:50051" \ --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set runner=job >> iter8.yaml + --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" >> iter8.yaml - name: Lint Kubernetes YAML file if: steps.modified-files.outputs.any_modified == 'true' uses: stackrox/kube-linter-action@v1 with: directory: iter8.yaml - - grpc-looped-experiment: - name: Lint gRPC looped experiment - runs-on: ubuntu-latest - - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/iter8 folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/iter8 - - - uses: azure/setup-helm@v3 - if: steps.modified-files.outputs.any_modified == 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Create Kubernetes YAML file - if: steps.modified-files.outputs.any_modified == 'true' - run: | - helm template charts/iter8 \ - --set tasks={grpc} \ - --set grpc.host="hello.default:50051" \ - --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set runner=cronjob \ - --set cronjobSchedule="*/1 * * * *" >> iter8.yaml - - - name: Lint Kubernetes YAML file - if: steps.modified-files.outputs.any_modified == 'true' - uses: stackrox/kube-linter-action@v1 - with: - directory: iter8.yaml \ No newline at end of file diff --git a/.github/workflows/assets.yaml b/.github/workflows/releaseassets.yaml similarity index 75% rename from .github/workflows/assets.yaml rename to .github/workflows/releaseassets.yaml index 34c6bfa68..530f115ac 100644 --- a/.github/workflows/assets.yaml +++ b/.github/workflows/releaseassets.yaml @@ -1,5 +1,10 @@ name: Publish binaries and Docker image +# Runs when a release is published + +# Build and publish binaries and release Docker image +# Test Docker image + on: release: types: [published] @@ -97,23 +102,28 @@ jobs: uses: helm/kind-action@v1.5.0 with: wait: 300s - - name: Create app + - name: Create httpbin application run: | kubectl create deployment httpbin --image=kennethreitz/httpbin kubectl expose deployment httpbin --type=ClusterIP --port=80 kubectl wait --for=condition=available --timeout=60s deploy/httpbin + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - name: iter8 k launch run: | iter8 k launch \ --set tasks={http} \ - --set http.url="http://httpbin.default/get" \ - --set runner=job - - name: try other iter8 k commands + --set http.url="http://httpbin.default/get" + sleep 60 + - name: Try other iter8 k commands run: | - iter8 k assert -c completed -c nofailure --timeout 60s - iter8 k report iter8 k log iter8 k delete + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f kubernetes-grpc-experiment: name: Kubernetes gRPC load test @@ -126,20 +136,22 @@ jobs: uses: helm/kind-action@v1.5.0 with: wait: 300s - - name: Create app + - name: Create routeguide application run: | kubectl create deployment routeguide --image=golang --port=50051 \ -- bash -c "git clone -b v1.52.0 --depth 1 https://github.com/grpc/grpc-go; cd grpc-go/examples/route_guide; sed -i "''" "'"s/localhost//"'" server/server.go; go run server/server.go" kubectl expose deployment routeguide --port=50051 kubectl wait --for=condition=available --timeout=60s deployment/routeguide - + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - name: Test gRPC service with grpcurl run: | curl -sO https://gist.githubusercontent.com/kalantar/510737f0fd58c0432a08e5b6e45ec97f/raw/524d6660284bf653ce0f29f3a25ed0e913c3df80/grpcurl-routeguide.yaml kubectl apply -f grpcurl-routeguide.yaml sleep 180 kubectl logs deploy/sleep - - name: iter8 k launch run: | iter8 k launch \ @@ -148,13 +160,14 @@ jobs: --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ --set grpc.call=routeguide.RouteGuide.GetFeature \ --set grpc.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ - --set runner=job - - name: try other iter8 k commands + sleep 60 + - name: Try other iter8 k commands run: | - iter8 k assert -c completed -c nofailure --timeout 60s - iter8 k report iter8 k log iter8 k delete + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f kubernetes-grpc-experiment2: name: Kubernetes gRPC load test 2 @@ -167,25 +180,30 @@ jobs: uses: helm/kind-action@v1.5.0 with: wait: 300s - - name: Create app + - name: Create hello application run: | kubectl create deploy hello --image=docker.io/grpc/java-example-hostname:latest --port=50051 kubectl expose deploy hello --port=50051 kubectl wait --for=condition=available --timeout=60s deploy/hello + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - name: iter8 k launch run: | iter8 k launch \ --set tasks={grpc} \ --set grpc.host="hello.default:50051" \ --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set runner=job - - name: try other iter8 k commands + --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" + sleep 60 + - name: Try other iter8 k commands run: | - iter8 k assert -c completed -c nofailure --timeout 60s - iter8 k report iter8 k log iter8 k delete + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f readiness: name: Kubernetes readiness test @@ -198,23 +216,26 @@ jobs: uses: helm/kind-action@v1.5.0 with: wait: 300s - - name: Deploy resources to cluster + - name: Create httpbin application run: | kubectl create deploy httpbin --image=kennethreitz/httpbin --port=80 kubectl expose deploy httpbin --port=80 + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - name: k launch with readiness checks run: | iter8 k launch \ - --set "tasks={ready,http,assess}" \ + --set "tasks={ready,http}" \ --set ready.deploy="httpbin" \ --set ready.service="httpbin" \ --set ready.timeout=60s \ - --set http.url=http://httpbin.default \ - --set assess.SLOs.upper.http/latency-mean=50 \ - --set runner=job - - name: k assert experiment completed without failures + --set http.url=http://httpbin.default/get + sleep 60 + - name: Check GET /httpDashboard run: | - iter8 k assert -c completed -c nofailure --timeout 60s + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f readiness-with-namespace: name: Kubernetes readiness test with namespace @@ -227,22 +248,25 @@ jobs: uses: helm/kind-action@v1.5.0 with: wait: 300s - - name: Deploy resources to cluster + - name: Create httpbin application run: | kubectl create deploy httpbin --image=kennethreitz/httpbin --port=80 kubectl expose deploy httpbin --port=80 kubectl create namespace experiments + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - name: k launch with readiness checks run: | iter8 k launch -n experiments \ - --set "tasks={ready,http,assess}" \ + --set "tasks={ready,http}" \ --set ready.deploy="httpbin" \ --set ready.service="httpbin" \ --set ready.timeout=60s \ --set ready.namespace=default \ - --set http.url=http://httpbin.default/get \ - --set assess.SLOs.upper.http/latency-mean=50 \ - --set runner=job - - name: k assert experiment completed without failures + --set http.url=http://httpbin.default/get + sleep 60 + - name: Check GET /httpDashboard run: | - iter8 k assert -n experiments -c completed -c nofailure --timeout 60s + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f \ No newline at end of file diff --git a/.github/workflows/releasecharts.yaml b/.github/workflows/releasecharts.yaml index 84d271dd3..ea7a8a581 100644 --- a/.github/workflows/releasecharts.yaml +++ b/.github/workflows/releasecharts.yaml @@ -1,12 +1,18 @@ -name: Release charts +name: Publish charts + +# Only runs when charts are pushed + +# Release charts and tests them on: push: branches: - master + paths: + - charts/** jobs: - release: + release-charts: permissions: contents: write runs-on: ubuntu-latest @@ -32,3 +38,363 @@ jobs: config: config.yaml env: CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + + http-experiment: + name: HTTP load test + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.15 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create httpbin application + run: | + kubectl create deployment httpbin --image=kennethreitz/httpbin + kubectl expose deployment httpbin --type=ClusterIP --port=80 + kubectl wait --for=condition=available --timeout=60s deploy/httpbin + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={http}" \ + --set http.url="http://httpbin.default/get" \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f + + http-payload-experiment: + name: HTTP load test with payload + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.14 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create httpbin application + run: | + kubectl create deployment httpbin --image=kennethreitz/httpbin + kubectl expose deployment httpbin --type=ClusterIP --port=80 + kubectl wait --for=condition=available --timeout=60s deploy/httpbin + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={http}" \ + --set http.url="http://httpbin.default/post" \ + --set http.payloadStr=hello \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f + + http-multiple-experiment: + name: HTTP load test with multiple endpoints + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.14 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create httpbin application + run: | + kubectl create deployment httpbin --image=kennethreitz/httpbin + kubectl expose deployment httpbin --type=ClusterIP --port=80 + kubectl wait --for=condition=available --timeout=60s deploy/httpbin + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={http}" \ + --set http.endpoints.get.url=http://httpbin.default/get \ + --set http.endpoints.getAnything.url=http://httpbin.default/anything \ + --set http.endpoints.post.url=http://httpbin.default/post \ + --set http.endpoints.post.payloadStr=hello \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f + + grpc-experiment: + name: gRPC load test + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.15 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create routeguide application + run: | + kubectl create deployment routeguide --image=golang --port=50051 \ + -- bash -c "git clone -b v1.52.0 --depth 1 https://github.com/grpc/grpc-go; cd grpc-go/examples/route_guide; sed -i "''" "'"s/localhost//"'" server/server.go; go run server/server.go" + kubectl expose deployment routeguide --port=50051 + kubectl wait --for=condition=available --timeout=60s deployment/routeguide + + - name: Test gRPC service with grpcurl + run: | + curl -sO https://gist.githubusercontent.com/kalantar/510737f0fd58c0432a08e5b6e45ec97f/raw/524d6660284bf653ce0f29f3a25ed0e913c3df80/grpcurl-routeguide.yaml + kubectl apply -f grpcurl-routeguide.yaml + sleep 180 + kubectl logs deploy/sleep + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={ready,grpc}" \ + --set ready.deploy=routeguide \ + --set ready.service=routeguide \ + --set ready.timeout=60s \ + --set grpc.host=routeguide.default:50051 \ + --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ + --set grpc.call=routeguide.RouteGuide.GetFeature \ + --set grpc.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f + + grpc-multiple-experiment: + name: gRPC load test with multiple endpoints + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.14 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create routeguide application + run: | + kubectl create deployment routeguide --image=golang --port=50051 \ + -- bash -c "git clone -b v1.52.0 --depth 1 https://github.com/grpc/grpc-go; cd grpc-go/examples/route_guide; sed -i "''" "'"s/localhost//"'" server/server.go; go run server/server.go" + kubectl expose deployment routeguide --port=50051 + kubectl wait --for=condition=available --timeout=60s deployment/routeguide + + - name: Test gRPC service with grpcurl + run: | + curl -sO https://gist.githubusercontent.com/kalantar/510737f0fd58c0432a08e5b6e45ec97f/raw/524d6660284bf653ce0f29f3a25ed0e913c3df80/grpcurl-routeguide.yaml + kubectl apply -f grpcurl-routeguide.yaml + sleep 180 + kubectl logs deploy/sleep + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={ready,grpc}" \ + --set ready.deploy=routeguide \ + --set ready.service=routeguide \ + --set ready.timeout=60s \ + --set grpc.host=routeguide.default:50051 \ + --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ + --set grpc.endpoints.getFeature.call=routeguide.RouteGuide.GetFeature \ + --set grpc.endpoints.getFeature.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ + --set grpc.endpoints.listFeatures.call=routeguide.RouteGuide.ListFeatures \ + --set grpc.endpoints.listFeatures.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/server.json \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f + + grpc-experiment2: + name: gRPC load test 2 + needs: release-charts + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Get modified files in the charts/iter8 folder + id: modified-files + uses: tj-actions/changed-files@v35 + with: + files: charts/iter8 + + - name: Install Iter8 + run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.14 + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + - name: Create hello application + run: | + kubectl create deploy hello --image=docker.io/grpc/java-example-hostname:latest --port=50051 + kubectl expose deploy hello --port=50051 + kubectl wait --for=condition=available --timeout=60s deploy/hello + + - name: Install controller + run: | + helm install --repo https://iter8-tools.github.io/iter8 iter8 controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + + - name: iter8 k launch + run: | + iter8 k launch \ + --set "tasks={grpc}" \ + --set grpc.host="hello.default:50051" \ + --set grpc.call="helloworld.Greeter.SayHello" \ + --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f \ No newline at end of file diff --git a/.github/workflows/spellcheck.yaml b/.github/workflows/spellcheck.yaml index 2f66c906e..716556c1c 100644 --- a/.github/workflows/spellcheck.yaml +++ b/.github/workflows/spellcheck.yaml @@ -1,5 +1,9 @@ name: Spell check markdown +# Runs during pull request + +# Spell check markdown + on: pull_request: branches: diff --git a/.github/workflows/testcharts.yaml b/.github/workflows/testcharts.yaml index 1d5c19dff..aafd78012 100644 --- a/.github/workflows/testcharts.yaml +++ b/.github/workflows/testcharts.yaml @@ -1,22 +1,27 @@ -name: Tests to ensure that changes to charts do not break user experience +name: Check changes to charts do not break user experience + +# Only runs when charts have changed + +# Test changes to charts against released images +# TODO: test if changes to kustomize have been replicated for charts +# NOTE: charts/controller is being tested in all of the http and grpc tests on: pull_request: branches: - - master + - master + paths: + - charts/** # Kind versions used to test Iter8 on different versions of Kubernetes # From: https://github.com/kubernetes-sigs/kind/releases env: versions: | - kindest/node:v1.26.3@sha256:61b92f38dff6ccc29969e7aa154d34e38b89443af1a2c14e6cfbd2df6419c66f - kindest/node:v1.25.8@sha256:00d3f5314cc35327706776e95b2f8e504198ce59ac545d0200a89e69fce10b7f - kindest/node:v1.24.12@sha256:1e12918b8bc3d4253bc08f640a231bb0d3b2c5a9b28aa3f2ca1aee93e1e8db16 - kindest/node:v1.23.17@sha256:e5fd1d9cd7a9a50939f9c005684df5a6d145e8d695e78463637b79464292e66c - kindest/node:v1.22.17@sha256:c8a828709a53c25cbdc0790c8afe12f25538617c7be879083248981945c38693 - kindest/node:v1.21.14@sha256:27ef72ea623ee879a25fe6f9982690a3e370c68286f4356bf643467c552a3888 - kindest/node:v1.27.1@sha256:9915f5629ef4d29f35b478e819249e89cfaffcbfeebda4324e5c01d53d937b09 - kindest/node:v1.27.0@sha256:c6b22e613523b1af67d4bc8a0c38a4c3ea3a2b8fbc5b367ae36345c9cb844518 + kindest/node:v1.28.0@sha256:b7a4cad12c197af3ba43202d3efe03246b3f0793f162afb40a33c923952d5b31 + kindest/node:v1.27.3@sha256:3966ac761ae0136263ffdb6cfd4db23ef8a83cba8a463690e98317add2c9ba72 + kindest/node:v1.26.6@sha256:6e2d8b28a5b601defe327b98bd1c2d1930b49e5d8c512e1895099e4504007adb + kindest/node:v1.25.11@sha256:227fa11ce74ea76a0474eeefb84cb75d8dad1b08638371ecf0e86259b35be0c8 + kindest/node:v1.24.15@sha256:7db4f8bea3e14b82d12e044e25e34bd53754b7f2b0e9d56df21774e6f66a70ab jobs: # Get the different Kind versions @@ -37,6 +42,8 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} + # TODO: add check to verify when a change is made to kustomize, a similar change is made to charts + http-experiment: name: HTTP load test needs: get_versions @@ -66,34 +73,42 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Create app + - name: Create httpbin application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deployment httpbin --image=kennethreitz/httpbin kubectl expose deployment httpbin --type=ClusterIP --port=80 kubectl wait --for=condition=available --timeout=60s deploy/httpbin + - name: Install controller + run: | + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={http,assess}" \ + --set "tasks={http}" \ --set http.url="http://httpbin.default/get" \ - --set assess.SLOs.upper.http/latency-p50=5 \ - --set assess.SLOs.upper.http/latency-p90=10 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=job + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f + http-payload-experiment: name: HTTP load test with payload needs: get_versions @@ -123,35 +138,43 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Create app + - name: Create httpbin application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deployment httpbin --image=kennethreitz/httpbin kubectl expose deployment httpbin --type=ClusterIP --port=80 kubectl wait --for=condition=available --timeout=60s deploy/httpbin + - name: Install controller + run: | + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={http,assess}" \ + --set "tasks={http}" \ --set http.url="http://httpbin.default/post" \ --set http.payloadStr=hello \ - --set assess.SLOs.upper.http/latency-p50=5 \ - --set assess.SLOs.upper.http/latency-p90=10 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=job + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /httpDashboard + run: | + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f + http-multiple-experiment: name: HTTP load test with multiple endpoints needs: get_versions @@ -181,97 +204,44 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Create app + - name: Create httpbin application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deployment httpbin --image=kennethreitz/httpbin kubectl expose deployment httpbin --type=ClusterIP --port=80 kubectl wait --for=condition=available --timeout=60s deploy/httpbin + - name: Install controller + run: | + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={http,assess}" \ + --set "tasks={http}" \ --set http.endpoints.get.url=http://httpbin.default/get \ --set http.endpoints.getAnything.url=http://httpbin.default/anything \ --set http.endpoints.post.url=http://httpbin.default/post \ --set http.endpoints.post.payloadStr=hello \ - --set assess.SLOs.upper.http-get/error-count=0 \ - --set assess.SLOs.upper.http-get/latency-mean=50 \ - --set assess.SLOs.upper.http-getAnything/error-count=0 \ - --set assess.SLOs.upper.http-getAnything/latency-mean=100 \ - --set assess.SLOs.upper.http-post/error-count=0 \ - --set assess.SLOs.upper.http-post/latency-mean=150 \ - --set runner=job + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete - - http-looped-experiment: - name: HTTP looped load test - needs: get_versions - runs-on: ubuntu-latest - strategy: - matrix: - version: ${{ fromJson(needs.get_versions.outputs.matrix) }} - - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/iter8 folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/iter8 - - - name: Install Iter8 - run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.15 - if: steps.modified-files.outputs.any_modified == 'true' - - - name: Start kind cluster ${{ matrix.version }} - uses: helm/kind-action@v1.5.0 - if: steps.modified-files.outputs.any_modified == 'true' - with: - wait: 300s - node_image: ${{ matrix.version }} - - name: Create app - if: steps.modified-files.outputs.any_modified == 'true' + - name: Expose metrics service run: | - kubectl create deployment httpbin --image=kennethreitz/httpbin - kubectl expose deployment httpbin --type=ClusterIP --port=80 - kubectl wait --for=condition=available --timeout=60s deploy/httpbin + kubectl port-forward service/iter8 8080:8080 & - - name: iter8 k launch - if: steps.modified-files.outputs.any_modified == 'true' + - name: Check GET /httpDashboard run: | - iter8 k launch \ - --localChart \ - --chartName charts/iter8 \ - --set "tasks={http,assess}" \ - --set http.url="http://httpbin.default/get" \ - --set assess.SLOs.upper.http/latency-p50=5 \ - --set assess.SLOs.upper.http/latency-p90=10 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=cronjob \ - --set cronjobSchedule="*/1 * * * *" - - - name: Try other iter8 k commands - if: steps.modified-files.outputs.any_modified == 'true' - run: | - iter8 k assert -c nofailure --timeout 300s - iter8 k report - iter8 k log - iter8 k delete + curl "http://localhost:8080/httpDashboard?namespace=default&experiment=default" -f grpc-experiment: name: gRPC load test @@ -302,7 +272,7 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Deploy gRPC service in the Kubernetes cluster + - name: Create routeguide application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deployment routeguide --image=golang --port=50051 \ @@ -317,14 +287,19 @@ jobs: kubectl apply -f grpcurl-routeguide.yaml sleep 180 kubectl logs deploy/sleep - + + - name: Install controller + run: | + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ + --set "tasks={ready,grpc}" \ --set ready.deploy=routeguide \ --set ready.service=routeguide \ --set ready.timeout=60s \ @@ -332,20 +307,22 @@ jobs: --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ --set grpc.call=routeguide.RouteGuide.GetFeature \ --set grpc.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p90=500 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=600 \ - --set runner=job + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f + grpc-multiple-experiment: name: gRPC load test with multiple endpoints needs: get_versions @@ -375,7 +352,7 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Deploy gRPC service in the Kubernetes cluster + - name: Create routeguide application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deployment routeguide --image=golang --port=50051 \ @@ -390,14 +367,19 @@ jobs: kubectl apply -f grpcurl-routeguide.yaml sleep 180 kubectl logs deploy/sleep - + + - name: Install controller + run: | + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 + - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ + --set "tasks={ready,grpc}" \ --set ready.deploy=routeguide \ --set ready.service=routeguide \ --set ready.timeout=60s \ @@ -407,20 +389,22 @@ jobs: --set grpc.endpoints.getFeature.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ --set grpc.endpoints.listFeatures.call=routeguide.RouteGuide.ListFeatures \ --set grpc.endpoints.listFeatures.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/server.json \ - --set assess.SLOs.upper.grpc-getFeature/error-rate=0 \ - --set assess.SLOs.upper.grpc-getFeature/latency/mean=50 \ - --set assess.SLOs.upper.grpc-listFeatures/error-rate=0 \ - --set assess.SLOs.upper.grpc-listFeatures/latency/mean=100 \ - --set runner=job + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete + - name: Expose metrics service + run: | + kubectl port-forward service/iter8 8080:8080 & + + - name: Check GET /grpcDashboard + run: | + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f + grpc-experiment2: name: gRPC load test 2 needs: get_versions @@ -450,177 +434,40 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Create app + - name: Create hello application if: steps.modified-files.outputs.any_modified == 'true' run: | kubectl create deploy hello --image=docker.io/grpc/java-example-hostname:latest --port=50051 kubectl expose deploy hello --port=50051 kubectl wait --for=condition=available --timeout=60s deploy/hello - - - name: iter8 k launch - if: steps.modified-files.outputs.any_modified == 'true' - run: | - iter8 k launch \ - --localChart \ - --chartName charts/iter8 \ - --set "tasks={grpc,assess}" \ - --set grpc.host="hello.default:50051" \ - --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p90=500 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=600 \ - --set runner=job - - name: Try other iter8 k commands - if: steps.modified-files.outputs.any_modified == 'true' + - name: Install controller run: | - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report - iter8 k log - iter8 k delete - - grpc-looped-experiment: - name: gRPC looped load test - needs: get_versions - runs-on: ubuntu-latest - strategy: - matrix: - version: ${{ fromJson(needs.get_versions.outputs.matrix) }} - - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/iter8 folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/iter8 - - - name: Install Iter8 - run: GOBIN=/usr/local/bin go install github.com/iter8-tools/iter8@v0.15 - if: steps.modified-files.outputs.any_modified == 'true' + helm install iter8 charts/controller --set logLevel=trace + kubectl rollout status --watch --timeout=60s statefulset/iter8 - - name: Start kind cluster ${{ matrix.version }} - uses: helm/kind-action@v1.5.0 - if: steps.modified-files.outputs.any_modified == 'true' - with: - wait: 300s - node_image: ${{ matrix.version }} - - - name: Create app - if: steps.modified-files.outputs.any_modified == 'true' - run: | - kubectl create deploy hello --image=docker.io/grpc/java-example-hostname:latest --port=50051 - kubectl expose deploy hello --port=50051 - kubectl wait --for=condition=available --timeout=60s deploy/hello - - name: iter8 k launch if: steps.modified-files.outputs.any_modified == 'true' run: | iter8 k launch \ --localChart \ --chartName charts/iter8 \ - --set "tasks={grpc,assess}" \ + --set "tasks={grpc}" \ --set grpc.host="hello.default:50051" \ --set grpc.call="helloworld.Greeter.SayHello" \ --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p90=500 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=600 \ - --set runner=cronjob \ - --set cronjobSchedule="*/1 * * * *" + sleep 60 - name: Try other iter8 k commands if: steps.modified-files.outputs.any_modified == 'true' run: | - iter8 k assert -c nofailure -c slos --timeout 300s - iter8 k report iter8 k log iter8 k delete - autox: - name: AutoX test - needs: get_versions - runs-on: ubuntu-latest - strategy: - matrix: - version: ${{ fromJson(needs.get_versions.outputs.matrix) }} - - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/autox folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/autox - - - name: Start kind cluster ${{ matrix.version }} - uses: helm/kind-action@v1.5.0 - if: steps.modified-files.outputs.any_modified == 'true' - with: - wait: 300s - node_image: ${{ matrix.version }} - - - name: Create namespace - if: steps.modified-files.outputs.any_modified == 'true' - run: | - kubectl create namespace argocd - - - name: Start AutoX controller - if: steps.modified-files.outputs.any_modified == 'true' - run: | - helm install autox charts/autox \ - --set 'groups.httpbin.trigger.name=httpbin' \ - --set 'groups.httpbin.trigger.namespace=default' \ - --set 'groups.httpbin.trigger.group=apps' \ - --set 'groups.httpbin.trigger.version=v1' \ - --set 'groups.httpbin.trigger.resource=deployments' \ - --set 'groups.httpbin.specs.iter8.name=iter8' \ - --set 'groups.httpbin.specs.iter8.values.http.url=http://httpbin.default/get' \ - --set 'groups.httpbin.specs.iter8.version=0.15.0' \ - --set 'groups.httpbin.specs.iter8.values.runner=job' - - - name: Check AutoX controller - if: steps.modified-files.outputs.any_modified == 'true' - run: kubectl wait --for=condition=available --timeout=60s deploy/autox -n argocd - - traffic: - name: Traffic test - needs: get_versions - runs-on: ubuntu-latest - strategy: - matrix: - version: ${{ fromJson(needs.get_versions.outputs.matrix) }} - - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Get modified files in the charts/traffic folder - id: modified-files - uses: tj-actions/changed-files@v35 - with: - files: charts/traffic - - - name: Start kind cluster ${{ matrix.version }} - uses: helm/kind-action@v1.5.0 - if: steps.modified-files.outputs.any_modified == 'true' - with: - wait: 300s - node_image: ${{ matrix.version }} - - - name: Start traffic - if: steps.modified-files.outputs.any_modified == 'true' + - name: Expose metrics service run: | - helm install traffic charts/traffic -f charts/traffic/testdata/values.yaml + kubectl port-forward service/iter8 8080:8080 & - - name: Check traffic - if: steps.modified-files.outputs.any_modified == 'true' + - name: Check GET /grpcDashboard run: | - kubectl rollout status --watch --timeout=60s statefulset.apps/traffic + curl "http://localhost:8080/grpcDashboard?namespace=default&experiment=default" -f \ No newline at end of file diff --git a/.github/workflows/testcode.yaml b/.github/workflows/testcode.yaml new file mode 100644 index 000000000..d13bd2a82 --- /dev/null +++ b/.github/workflows/testcode.yaml @@ -0,0 +1,129 @@ +name: Check changes to golang code does not break CLI + +# Only runs when golang code (or test cases) have changed + +# Only tests if iter8 CLI works after golang code changes + +on: + pull_request: + branches: + - master + paths: + - '**.go' + - 'testdata/**' + +jobs: + unit-test: + name: unit-test + runs-on: ubuntu-latest + steps: + - name: Install Go + uses: actions/setup-go@v4 + with: + go-version: 1.19 + - name: Check out code into the Go module directory + uses: actions/checkout@v3 + - name: Test and compute coverage + run: make coverage # includes vet and lint + - name: Enforce coverage + run: | + export COVERAGE=$(go tool cover -func coverage.out | grep total | awk '{print substr($3, 1, length($3)-1)}') + echo "code coverage is at ${COVERAGE}" + if [ 1 -eq "$(echo "${COVERAGE} > 76.0" | bc)" ]; then \ + echo "all good... coverage is above 76.0%"; + else \ + echo "not good... coverage is not above 76.0%"; + exit 1 + fi + + cli-test-http: + name: CLI test with http task + runs-on: ubuntu-latest + steps: + - name: Install Go + uses: actions/setup-go@v4 + with: + go-version: 1.19 + - name: Check out code into the Go module directory + uses: actions/checkout@v3 + - name: Build and install Iter8 + run: make install + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + # # No need to create httpbin application because only testing CLI + # + # - name: Create httpbin application + # run: | + # kubectl create deploy httpbin --image=kennethreitz/httpbin --port=80 + # kubectl expose deploy httpbin --port=80 + + - name: CLI test with http task + run: | + iter8 k launch \ + --set "tasks={ready,http}" \ + --set ready.deploy=httpbin \ + --set ready.service=httpbin \ + --set ready.timeout=60s \ + --set http.url=http://httpbin.default/get \ + --set http.duration="3s" + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete + + cli-test-grpc: + name: CLI test with grpc task + runs-on: ubuntu-latest + steps: + - name: Install Go + uses: actions/setup-go@v4 + with: + go-version: 1.19 + - name: Check out code into the Go module directory + uses: actions/checkout@v3 + - name: Build and install Iter8 + run: make install + + - name: Start kind cluster + uses: helm/kind-action@v1.5.0 + with: + wait: 300s + + # # No need to create routeguide application because only testing CLI + # + # - name: Create routeguide application + # run: | + # kubectl create deployment routeguide --image=golang --port=50051 \ + # -- bash -c "git clone -b v1.52.0 --depth 1 https://github.com/grpc/grpc-go; cd grpc-go/examples/route_guide; sed -i "''" "'"s/localhost//"'" server/server.go; go run server/server.go" + # kubectl expose deployment routeguide --port=50051 + # kubectl wait --for=condition=available --timeout=60s deployment/routeguide + + # - name: Test gRPC service with grpcurl + # run: | + # curl -sO https://gist.githubusercontent.com/kalantar/510737f0fd58c0432a08e5b6e45ec97f/raw/524d6660284bf653ce0f29f3a25ed0e913c3df80/grpcurl-routeguide.yaml + # kubectl apply -f grpcurl-routeguide.yaml + # sleep 180 + # kubectl logs deploy/sleep + + - name: CLI test with grpc task + run: | + iter8 k launch \ + --set "tasks={ready,grpc}" \ + --set ready.deploy=routeguide \ + --set ready.service=routeguide \ + --set ready.timeout=60s \ + --set grpc.host=routeguide.default:50051 \ + --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ + --set grpc.call=routeguide.RouteGuide.GetFeature \ + --set grpc.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json + sleep 60 + + - name: Try other iter8 k commands + run: | + iter8 k log + iter8 k delete \ No newline at end of file diff --git a/.github/workflows/testkustomize.yaml b/.github/workflows/testkustomize.yaml index a7adac5ea..ceb29602c 100644 --- a/.github/workflows/testkustomize.yaml +++ b/.github/workflows/testkustomize.yaml @@ -1,7 +1,16 @@ -name: Test kustomize experiments +name: Check changes to kustomize do not break user experience + +# Only runs when kustomize have changed + +# Test changes to kustomize against released images +# TODO: test if changes to charts have been replicated for kustomize on: pull_request: + branches: + - master + paths: + - kustomize/** # Kind versions used to test Iter8 on different versions of Kubernetes # From: https://github.com/kubernetes-sigs/kind/releases @@ -35,8 +44,10 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} - traffic: - name: Traffic test +# TODO: add check to verify when a change is made to charts, a similar change is made to kustomize + + controller-namespaceScoped: + name: Controller namespace scoped test needs: get_versions runs-on: ubuntu-latest strategy: @@ -47,11 +58,11 @@ jobs: - name: Check out code uses: actions/checkout@v3 - - name: Get modified files in the charts/traffic folder + - name: Get modified files in the kustomize/controller folder id: modified-files uses: tj-actions/changed-files@v35 with: - files: charts/traffic + files: kustomize/controller - name: Start kind cluster ${{ matrix.version }} uses: helm/kind-action@v1.5.0 @@ -60,18 +71,18 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Start traffic + - name: Start controller if: steps.modified-files.outputs.any_modified == 'true' run: | - kubectl apply -k kustomize/traffic/namespaceScoped + kubectl apply -k kustomize/controller/namespaceScoped - - name: Check traffic + - name: Check controller if: steps.modified-files.outputs.any_modified == 'true' run: | - kubectl rollout status --watch --timeout=60s statefulset.apps/iter8-traffic + kubectl rollout status --watch --timeout=60s statefulset.apps/iter8 - traffic-clusterScoped: - name: Traffic cluster scoped test + controller-clusterScoped: + name: Controller cluster scoped test needs: get_versions runs-on: ubuntu-latest strategy: @@ -82,11 +93,11 @@ jobs: - name: Check out code uses: actions/checkout@v3 - - name: Get modified files in the charts/traffic folder + - name: Get modified files in the kustomize/controller folder id: modified-files uses: tj-actions/changed-files@v35 with: - files: charts/traffic + files: kustomize/controller - name: Start kind cluster ${{ matrix.version }} uses: helm/kind-action@v1.5.0 @@ -95,12 +106,12 @@ jobs: wait: 300s node_image: ${{ matrix.version }} - - name: Start traffic + - name: Start controller if: steps.modified-files.outputs.any_modified == 'true' run: | - kubectl apply -k kustomize/traffic/clusterScoped + kubectl apply -k kustomize/controller/clusterScoped - - name: Check traffic + - name: Check controller if: steps.modified-files.outputs.any_modified == 'true' run: | - kubectl rollout status --watch --timeout=60s statefulset.apps/iter8-traffic \ No newline at end of file + kubectl rollout status --watch --timeout=60s statefulset.apps/iter8 \ No newline at end of file diff --git a/.github/workflows/testperformance.yaml b/.github/workflows/testperformance.yaml deleted file mode 100644 index af5b6813d..000000000 --- a/.github/workflows/testperformance.yaml +++ /dev/null @@ -1,253 +0,0 @@ -name: Performance tests to assess the functionality of the latest version of Iter8 (master branch) - -on: - pull_request: - -jobs: - unit-test: - runs-on: ubuntu-latest - steps: - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: 1.19 - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Test and compute coverage - run: make coverage # includes vet and lint - - name: Enforce coverage - run: | - export COVERAGE=$(go tool cover -func coverage.out | grep total | awk '{print substr($3, 1, length($3)-1)}') - echo "code coverage is at ${COVERAGE}" - if [ 1 -eq "$(echo "${COVERAGE} > 76.0" | bc)" ]; then \ - echo "all good... coverage is above 76.0%"; - else \ - echo "not good... coverage is not above 76.0%"; - exit 1 - fi - - kubernetes-load-test-http: - name: HTTP load test (with readiness) at the edge of Kubernetes - runs-on: ubuntu-latest - steps: - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: 1.19 - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Build and install Iter8 - run: make install - - name: Start kind cluster - uses: helm/kind-action@v1.5.0 - with: - wait: 300s - - - name: run httpbin - run: | - set -e - kubectl create deploy httpbin --image=kennethreitz/httpbin --port=80 - kubectl expose deploy httpbin --port=80 - - - name: load-test-http in Kubernetes - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,http,assess}" \ - --set ready.deploy=httpbin \ - --set ready.service=httpbin \ - --set ready.timeout=60s \ - --set http.url=http://httpbin.default/get \ - --set http.duration="3s" \ - --set assess.SLOs.upper.http/latency-p50=5 \ - --set assess.SLOs.upper.http/latency-p90=10 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 60s - iter8 k report - iter8 k log - iter8 k delete - - - name: load-test-http with payload in Kubernetes - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,http,assess}" \ - --set ready.deploy=httpbin \ - --set ready.service=httpbin \ - --set ready.timeout=60s \ - --set http.url=http://httpbin.default/post \ - --set http.payloadStr=hello \ - --set http.duration="3s" \ - --set assess.SLOs.upper.http/latency-p50=5 \ - --set assess.SLOs.upper.http/latency-p90=10 \ - --set assess.SLOs.upper.http/error-count=0 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 60s - iter8 k report - iter8 k log - iter8 k delete - - - name: load-test-http with multiple endpoints in Kubernetes - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,http,assess}" \ - --set ready.deploy=httpbin \ - --set ready.service=httpbin \ - --set ready.timeout=60s \ - --set http.endpoints.get.url=http://httpbin.default/get \ - --set http.endpoints.getAnything.url=http://httpbin.default/anything \ - --set http.endpoints.post.url=http://httpbin.default/post \ - --set http.endpoints.post.payloadStr=hello \ - --set http.duration="3s" \ - --set assess.SLOs.upper.http-get/error-count=0 \ - --set assess.SLOs.upper.http-get/latency-mean=50 \ - --set assess.SLOs.upper.http-getAnything/error-count=0 \ - --set assess.SLOs.upper.http-getAnything/latency-mean=100 \ - --set assess.SLOs.upper.http-post/error-count=0 \ - --set assess.SLOs.upper.http-post/latency-mean=150 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report - iter8 k log - iter8 k delete - - kubernetes-load-test-grpc: - name: gRPC load test with various URLs - runs-on: ubuntu-latest - steps: - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: 1.19 - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Build and install Iter8 - run: make install - - - name: Start kind cluster - uses: helm/kind-action@v1.5.0 - with: - wait: 300s - - - name: Deploy gRPC service in the Kubernetes cluster - run: | - kubectl create deployment routeguide --image=golang --port=50051 \ - -- bash -c "git clone -b v1.52.0 --depth 1 https://github.com/grpc/grpc-go; cd grpc-go/examples/route_guide; sed -i "''" "'"s/localhost//"'" server/server.go; go run server/server.go" - kubectl expose deployment routeguide --port=50051 - kubectl wait --for=condition=available --timeout=60s deployment/routeguide - - - name: Test gRPC service with grpcurl - run: | - curl -sO https://gist.githubusercontent.com/kalantar/510737f0fd58c0432a08e5b6e45ec97f/raw/524d6660284bf653ce0f29f3a25ed0e913c3df80/grpcurl-routeguide.yaml - kubectl apply -f grpcurl-routeguide.yaml - sleep 180 - kubectl logs deploy/sleep - - - name: load test grpc service - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ - --set ready.deploy=routeguide \ - --set ready.service=routeguide \ - --set ready.timeout=60s \ - --set grpc.host=routeguide.default:50051 \ - --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ - --set grpc.call=routeguide.RouteGuide.GetFeature \ - --set grpc.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=800 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report - iter8 k log - iter8 k delete - - - name: load test grpc service with multiple endpoints - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ - --set ready.deploy=routeguide \ - --set ready.service=routeguide \ - --set ready.timeout=60s \ - --set grpc.host=routeguide.default:50051 \ - --set grpc.protoURL=https://raw.githubusercontent.com/grpc/grpc-go/v1.52.0/examples/route_guide/routeguide/route_guide.proto \ - --set grpc.endpoints.getFeature.call=routeguide.RouteGuide.GetFeature \ - --set grpc.endpoints.getFeature.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/unary.json \ - --set grpc.endpoints.listFeatures.call=routeguide.RouteGuide.ListFeatures \ - --set grpc.endpoints.listFeatures.dataURL=https://raw.githubusercontent.com/iter8-tools/docs/v0.13.13/samples/grpc-payload/server.json \ - --set assess.SLOs.upper.grpc-getFeature/error-rate=0 \ - --set assess.SLOs.upper.grpc-getFeature/latency/mean=50 \ - --set assess.SLOs.upper.grpc-listFeatures/error-rate=0 \ - --set assess.SLOs.upper.grpc-listFeatures/latency/mean=100 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 300s - iter8 k report - iter8 k log - iter8 k delete - - kubernetes-load-test-grpc2: - name: gRPC load test 2 with various URLs - runs-on: ubuntu-latest - steps: - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: 1.19 - - name: Check out code into the Go module directory - uses: actions/checkout@v3 - - name: Build and install Iter8 - run: make install - - - name: Start kind cluster - uses: helm/kind-action@v1.5.0 - with: - wait: 300s - - - name: Deploy gRPC service in the Kubernetes cluster - run: | - kubectl create deploy hello --image=docker.io/grpc/java-example-hostname:latest --port=50051 - kubectl expose deploy hello --port=50051 - - - name: load test grpc service with protoURL - run: | - iter8 k launch --localChart --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ - --set ready.deploy=hello \ - --set ready.service=hello \ - --set ready.timeout=60s \ - --set grpc.host="hello.default:50051" \ - --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set grpc.data.name="frodo" \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p90=500 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=600 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 60s - iter8 k report - iter8 k log - iter8 k delete - - - name: load test grpc service with proto/data/metadata URLs - run: | - iter8 k launch --localChart -l trace \ - --chartName charts/iter8 \ - --set "tasks={ready,grpc,assess}" \ - --set ready.deploy=hello \ - --set ready.service=hello \ - --set ready.timeout=60s \ - --set grpc.host="hello.default:50051" \ - --set grpc.call="helloworld.Greeter.SayHello" \ - --set grpc.protoURL="https://raw.githubusercontent.com/grpc/grpc-go/master/examples/helloworld/helloworld/helloworld.proto" \ - --set grpc.dataURL="https://gist.githubusercontent.com/sriumcp/3f3178f4b698af6696c925832e51b0ba/raw/d02aa698d34aa2067f7a2f6afb4ceb616b0db822/name.json" \ - --set grpc.metadataURL="https://gist.githubusercontent.com/sriumcp/3f3178f4b698af6696c925832e51b0ba/raw/d02aa698d34aa2067f7a2f6afb4ceb616b0db822/name.json" \ - --set assess.SLOs.upper.grpc/error-rate=0 \ - --set assess.SLOs.upper.grpc/latency/mean=200 \ - --set assess.SLOs.upper.grpc/latency/p90=500 \ - --set assess.SLOs.upper.grpc/latency/p'97\.5'=600 \ - --set runner=job - iter8 k assert -c completed -c nofailure -c slos --timeout 60s - iter8 k report - iter8 k log - iter8 k delete \ No newline at end of file diff --git a/.github/workflows/versionbump.yaml b/.github/workflows/versionbump.yaml index a8182f70d..9901e1d8f 100644 --- a/.github/workflows/versionbump.yaml +++ b/.github/workflows/versionbump.yaml @@ -1,9 +1,15 @@ name: Version bump check +# Only runs when charts have changed + +# Check if the version number of changed charts have been bumped + on: pull_request: branches: - master + paths: + - charts/** jobs: # Get the paths for the Helm charts to version check diff --git a/ADOPTERS.md b/ADOPTERS.md index 3490f9db2..73fdca3c3 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -7,6 +7,6 @@ If you are starting to use Iter8, we would love to see you in the list below. Pl | IBM Cloud (DevOps Toolchains) | [Michael Kalantar](https://github.com/kalantar), [Srinivasan Parthasarathy](https://github.com/sriumcp) | | IBM Research Cloud Innovation Lab | [Atin Sood](https://github.com/atinsood)| | IBM Cloud (Code Engine) | [Doug Davis](https://github.com/duglin) | -| ChaosNative(LitmusChaos) | [Shubham Chaudhary](https://github.com/ispeakc0de) | +| ChaosNative (LitmusChaos) | [Shubham Chaudhary](https://github.com/ispeakc0de) | | Seldon Core | [Clive Cox](https://github.com/cliveseldon) | | Datagrate, Inc. (jetic.io) | [Mert Öztürk](https://github.com/mertdotcc) | \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6542f07ce..bd37f6a99 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y curl # Set Iter8 version from build args ARG TAG -ENV TAG=${TAG:-v0.15.0} +ENV TAG=${TAG:-v0.16.0} # Download iter8 compressed binary RUN curl -LO https://github.com/iter8-tools/iter8/releases/download/${TAG}/iter8-linux-amd64.tar.gz diff --git a/README.md b/README.md index 2b47808ac..3d289c994 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,10 @@ Iter8 is the Kubernetes release optimizer built for DevOps, MLOps, SRE and data Iter8 supports the following use-cases. -1. Performance testing and SLO validation of HTTP services. -2. Performance testing and SLO validation of gRPC services. -3. SLO validation using custom metrics from any database(s) or REST API(s). +1. Performance testing of HTTP services. +2. Performance testing of gRPC services. +3. A/B/n testing of applications and ML models +4. Reliable and automated traffic engineering: blue-green and canary ## :rocket: Iter8 experiment @@ -22,13 +23,10 @@ Iter8 introduces the notion of an experiment, which is a list of configurable ta Iter8 packs a number of powerful features that facilitate Kubernetes app testing and experimentation. They include the following. 1. **Generating load and collecting built-in metrics for HTTP and gRPC services.** Simplifies performance testing by eliminating the need to setup and use metrics databases. -2. **Well-defined notion of service-level objectives (SLOs).** Makes it simple to define and verify SLOs in experiments. -3. **Custom metrics.** Enables the use of custom metrics from any database(s) or REST API(s) in experiments. -4. **Readiness check.** The performance testing portion of the experiment begins only after the service is ready. -5. **HTML/text reports.** Promotes human understanding of experiment results through visual insights. -6. **Assertions.** Verifies whether the target app satisfies the specified SLOs or not after an experiment. Simplifies automation in CI/CD/GitOps pipelines: branch off into different paths depending upon whether the assertions are true or false. -7. **Multi-loop experiments.** Experiment tasks can be executed periodically (multi-loop) instead of just once (single-loop). This enables Iter8 to refresh metric values and perform SLO validation using the latest metric values during each loop. -8. **Experiment anywhere.** Iter8 experiments can be launched inside a Kubernetes cluster, in local environments, or inside a GitHub Actions pipeline. +2. **Readiness check.** The performance testing portion of the experiment begins only after the service is ready. +3. **Experiment anywhere.** Iter8 experiments can be launched inside a Kubernetes cluster, in local environments, or inside a GitHub Actions pipeline. +4. **Traffic controller.** Automatically and dynamically reconfigures routing resources based on the state of Kubernetes apps/ML models. +5. **Client-side SDK.** Facilitates routing and metrics collection task associated with distributed (i.e., client-server architecture-based) A/B/n testing in Kubernetes. Please see [https://iter8.tools](https://iter8.tools) for the complete documentation. diff --git a/abn/service.go b/abn/service.go index 1fc39ce89..844e87f4d 100644 --- a/abn/service.go +++ b/abn/service.go @@ -24,8 +24,11 @@ import ( ) const ( - // metricsDirEnv is the environment variable identifying the directory with metrics storage - metricsDirEnv = "METRICS_DIR" + // MetricsDirEnv is the environment variable identifying the directory with metrics storage + MetricsDirEnv = "METRICS_DIR" + + configEnv = "ABN_CONFIG_FILE" + defaultPortNumber = 50051 ) var ( @@ -85,11 +88,6 @@ func (server *abnServer) WriteMetric(ctx context.Context, metricMsg *pb.MetricVa ) } -const ( - configEnv = "ABN_CONFIG_FILE" - defaultPortNumber = 50051 -) - // abnConfig defines the configuration of the controllers type abnConfig struct { // Port is port number on which the abn gRPC service should listen @@ -119,8 +117,8 @@ func LaunchGRPCServer(opts []grpc.ServerOption, stopCh <-chan struct{}) error { grpcServer := grpc.NewServer(opts...) pb.RegisterABNServer(grpcServer, newServer()) - // configure metricsClient if needed - MetricsClient, err = badgerdb.GetClient(badger.DefaultOptions(os.Getenv(metricsDirEnv)), badgerdb.AdditionalOptions{}) + // configure MetricsClient if needed + MetricsClient, err = badgerdb.GetClient(badger.DefaultOptions(os.Getenv(MetricsDirEnv)), badgerdb.AdditionalOptions{}) if err != nil { log.Logger.Error("Unable to configure metrics storage client ", err) return err diff --git a/abn/service_impl.go b/abn/service_impl.go index 5691fe453..2359e24c3 100644 --- a/abn/service_impl.go +++ b/abn/service_impl.go @@ -47,6 +47,9 @@ func lookupInternal(application string, user string) (controllers.RoutemapInterf } // record user; ignore error if any; this is best effort + if MetricsClient == nil { + return nil, invalidVersion, fmt.Errorf("no metrics client") + } _ = MetricsClient.SetUser(application, versionNumber, *s.GetVersions()[versionNumber].GetSignature(), user) return s, versionNumber, nil @@ -131,6 +134,9 @@ func writeMetricInternal(application, user, metric, valueStr string) error { v := s.GetVersions()[versionNumber] transaction := uuid.NewString() + if MetricsClient == nil { + return fmt.Errorf("no metrics client") + } err = MetricsClient.SetMetric( s.GetNamespace()+"/"+s.GetName(), versionNumber, *v.GetSignature(), metric, user, transaction, diff --git a/abn/service_test.go b/abn/service_test.go index 78fd5f9f8..89905f704 100644 --- a/abn/service_test.go +++ b/abn/service_test.go @@ -212,16 +212,24 @@ func getMetricsCount(t *testing.T, namespace string, name string, version int, m if rm == nil || reflect.ValueOf(rm).IsNil() { return 0 } + assert.Less(t, version, len(rm.GetVersions())) v := rm.GetVersions()[version] signature := v.GetSignature() + if nil == signature { return 0 } + + // TODO: better error handling when there is no metrics client + if MetricsClient == nil { + return 0 + } versionmetrics, err := MetricsClient.GetMetrics(namespace+"/"+name, version, *signature) if err != nil { return 0 } + metrics, ok := (*versionmetrics)[metric] if !ok { return 0 @@ -236,7 +244,7 @@ func TestLaunchGRPCServer(t *testing.T) { defer cancel() // define METRICS_DIR - err := os.Setenv(metricsDirEnv, t.TempDir()) + err := os.Setenv(MetricsDirEnv, t.TempDir()) assert.NoError(t, err) configFile := filepath.Clean(util.CompletePath("../testdata", "abninputs/config.yaml")) diff --git a/action/assert.go b/action/assert.go deleted file mode 100644 index 66f69a1e2..000000000 --- a/action/assert.go +++ /dev/null @@ -1,122 +0,0 @@ -package action - -import ( - "fmt" - "strings" - "time" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" - "github.com/iter8-tools/iter8/driver" -) - -const ( - // Completed states that the experiment is complete - Completed = "completed" - // NoFailure states that none of the tasks in the experiment have failed - NoFailure = "nofailure" - // SLOs states that all app versions participating in the experiment satisfy SLOs - SLOs = "slos" -) - -// AssertOpts are the options used for asserting experiment results -type AssertOpts struct { - // Timeout is the duration to wait for conditions to be satisfied - Timeout time.Duration - // Conditions are checked by assert - Conditions []string - // RunOpts provides options relating to experiment resources - RunOpts -} - -// NewAssertOpts initializes and returns assert opts -func NewAssertOpts(kd *driver.KubeDriver) *AssertOpts { - return &AssertOpts{ - RunOpts: *NewRunOpts(kd), - } -} - -// KubeRun asserts conditions for a Kubernetes experiment -func (aOpts *AssertOpts) KubeRun() (bool, error) { - if err := aOpts.KubeDriver.Init(); err != nil { - return false, err - } - - return aOpts.Run(aOpts.KubeDriver) -} - -// Run builds the experiment and verifies assert conditions -func (aOpts *AssertOpts) Run(eio base.Driver) (bool, error) { - allGood, err := aOpts.verify(eio) - if err != nil { - return false, err - } - if !allGood { - log.Logger.Error("assert conditions failed") - return false, nil - } - return true, nil -} - -// verify implements the core logic of assert -func (aOpts *AssertOpts) verify(eio base.Driver) (bool, error) { - // timeSpent tracks how much time has been spent so far in assert attempts - var timeSpent, _ = time.ParseDuration("0s") - - // sleepTime specifies how long to sleep in between retries of asserts - var sleepTime, _ = time.ParseDuration("3s") - - // check assert conditions - for { - exp, err := base.BuildExperiment(eio) - if err != nil { - return false, err - } - - allGood := true - - for _, cond := range aOpts.Conditions { - if strings.ToLower(cond) == Completed { - c := exp.Completed() - allGood = allGood && c - if c { - log.Logger.Info("experiment completed") - } else { - log.Logger.Info("experiment did not complete") - } - } else if strings.ToLower(cond) == NoFailure { - nf := exp.NoFailure() - allGood = allGood && nf - if nf { - log.Logger.Info("experiment has no failure") - } else { - log.Logger.Info("experiment failed") - } - } else if strings.ToLower(cond) == SLOs { - slos := exp.SLOs() - allGood = allGood && slos - if slos { - log.Logger.Info("SLOs are satisfied") - } else { - log.Logger.Info("SLOs are not satisfied") - } - } else { - log.Logger.Error("unsupported assert condition detected; ", cond) - return false, fmt.Errorf("unsupported assert condition detected; %v", cond) - } - } - - if allGood { - log.Logger.Info("all conditions were satisfied") - return true, nil - } - if timeSpent >= aOpts.Timeout { - log.Logger.Info("not all conditions were satisfied") - return false, nil - } - log.Logger.Infof("sleeping %v ................................", sleepTime) - time.Sleep(sleepTime) - timeSpent += sleepTime - } - -} diff --git a/action/assert_test.go b/action/assert_test.go deleted file mode 100644 index 54d7cdc66..000000000 --- a/action/assert_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package action - -import ( - "context" - "os" - "testing" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/driver" - "github.com/stretchr/testify/assert" - "helm.sh/helm/v3/pkg/cli" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestKubeAssert(t *testing.T) { - _ = os.Chdir(t.TempDir()) - // fix aOpts - aOpts := NewAssertOpts(driver.NewFakeKubeDriver(cli.New())) - aOpts.Conditions = []string{Completed, NoFailure, SLOs} - - byteArray, _ := os.ReadFile(base.CompletePath("../testdata/assertinputs", driver.ExperimentPath)) - _, _ = aOpts.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{driver.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - ok, err := aOpts.KubeRun() - assert.True(t, ok) - assert.NoError(t, err) -} diff --git a/action/doc.go b/action/doc.go index 6dbc69a6e..a78b33500 100644 --- a/action/doc.go +++ b/action/doc.go @@ -1,6 +1,5 @@ // Package action contains the logic for each action that Iter8 can perform. // -// This is a library for calling top-level Iter8 actions like 'launch', -// 'assert', or 'report'. Actions approximately match the command line -// invocations that the Iter8 CLI uses. +// This is a library for calling top-level Iter8 actions like 'launch' and 'assert'. +// Actions approximately match the command line invocations that the Iter8 CLI uses. package action diff --git a/action/launch_test.go b/action/launch_test.go index 62596acbb..c5705a3ea 100644 --- a/action/launch_test.go +++ b/action/launch_test.go @@ -1,6 +1,7 @@ package action import ( + "fmt" "os" "testing" @@ -16,11 +17,14 @@ func TestKubeLaunch(t *testing.T) { // fix lOpts lOpts := NewLaunchOpts(driver.NewFakeKubeDriver(cli.New())) - lOpts.Values = []string{"tasks={http}", "http.url=https://httpbin.org/get", "http.duration=2s", "runner=job"} + lOpts.Values = []string{"tasks={http}", "http.url=https://httpbin.org/get", "http.duration=2s"} err = lOpts.KubeRun() assert.NoError(t, err) + fmt.Println(lOpts.Group) + fmt.Println(lOpts.Releases) + rel, err := lOpts.Releases.Last(lOpts.Group) assert.NotNil(t, rel) assert.Equal(t, 1, rel.Version) @@ -35,7 +39,7 @@ func TestKubeLaunchLocalChart(t *testing.T) { lOpts := NewLaunchOpts(driver.NewFakeKubeDriver(cli.New())) lOpts.ChartName = base.CompletePath("../charts", "iter8") lOpts.LocalChart = true - lOpts.Values = []string{"tasks={http}", "http.url=https://httpbin.org/get", "http.duration=2s", "runner=job"} + lOpts.Values = []string{"tasks={http}", "http.url=https://httpbin.org/get", "http.duration=2s"} err = lOpts.KubeRun() assert.NoError(t, err) diff --git a/action/report.go b/action/report.go deleted file mode 100644 index dc2c0061e..000000000 --- a/action/report.go +++ /dev/null @@ -1,78 +0,0 @@ -package action - -import ( - "fmt" - "io" - "strings" - - "github.com/iter8-tools/iter8/action/report" - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" - "github.com/iter8-tools/iter8/driver" -) - -const ( - // TextOutputFormatKey is the output format used to create text output - TextOutputFormatKey = "text" - - // HTMLOutputFormatKey is the output format used to create html output - HTMLOutputFormatKey = "html" -) - -// ReportOpts are the options used for generating reports from experiment result -type ReportOpts struct { - // OutputFormat specifies the output format to be used by report - OutputFormat string - // RunOpts enables fetching local experiment spec and result - RunOpts - // KubeDriver enables fetching Kubernetes experiment spec and result - *driver.KubeDriver -} - -// NewReportOpts initializes and returns report opts -func NewReportOpts(kd *driver.KubeDriver) *ReportOpts { - return &ReportOpts{ - RunOpts: RunOpts{ - RunDir: ".", - }, - OutputFormat: TextOutputFormatKey, - KubeDriver: kd, - } -} - -// KubeRun generates report for a Kubernetes experiment -func (rOpts *ReportOpts) KubeRun(out io.Writer) error { - if err := rOpts.KubeDriver.Init(); err != nil { - return err - } - return rOpts.Run(rOpts, out) -} - -// Run generates the text or HTML report -func (rOpts *ReportOpts) Run(eio base.Driver, out io.Writer) error { - var e *base.Experiment - var err error - if e, err = base.BuildExperiment(eio); err != nil { - return err - } - switch strings.ToLower(rOpts.OutputFormat) { - case TextOutputFormatKey: - reporter := report.TextReporter{ - Reporter: &report.Reporter{ - Experiment: e, - }, - } - return reporter.Gen(out) - case HTMLOutputFormatKey: - reporter := report.HTMLReporter{ - Reporter: &report.Reporter{ - Experiment: e, - }, - } - return reporter.Gen(out) - default: - e := fmt.Errorf("unsupported report format %v", rOpts.OutputFormat) - log.Logger.Error(e) - return e - } -} diff --git a/action/report/common.go b/action/report/common.go deleted file mode 100644 index d0ca8552d..000000000 --- a/action/report/common.go +++ /dev/null @@ -1,137 +0,0 @@ -package report - -import ( - "fmt" - "sort" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" -) - -// Reporter implements methods that are common to text and HTML reporting. -type Reporter struct { - // Experiment enables access to all base.Experiment data and methods - *base.Experiment -} - -// SortedScalarAndSLOMetrics extracts and sorts metric names from experiment. -// It looks for scalar metrics referenced in the MetricsInfo section, -// and also for scalar metrics referenced in SLOs. -func (r *Reporter) SortedScalarAndSLOMetrics() []string { - keys := []string{} - // add scalar and summary metrics referenced in MetricsInfo - for k, mm := range r.Result.Insights.MetricsInfo { - if mm.Type == base.CounterMetricType || mm.Type == base.GaugeMetricType { - keys = append(keys, k) - } - if mm.Type == base.SummaryMetricType { - for _, agg := range []base.AggregationType{ - base.CountAggregator, - base.MeanAggregator, - base.StdDevAggregator, - base.MinAggregator, - base.MaxAggregator} { - keys = append(keys, k+"/"+string(agg)) - } - } - } - // also add metrics referenced in SLOs - // only scalar metrics can feature in SLOs (for now) - if r.Result.Insights.SLOs != nil { - for _, v := range r.Result.Insights.SLOs.Upper { - nm, err := base.NormalizeMetricName(v.Metric) - if err == nil { - keys = append(keys, nm) - } - } - for _, v := range r.Result.Insights.SLOs.Lower { - nm, err := base.NormalizeMetricName(v.Metric) - if err == nil { - keys = append(keys, nm) - } - } - } - // remove duplicates - tmp := base.Uniq(keys) - uniqKeys := []string{} - for _, val := range tmp { - uniqKeys = append(uniqKeys, val.(string)) - } - // return sorted metrics - sort.Strings(uniqKeys) - return uniqKeys -} - -// ScalarMetricValueStr extracts value of a scalar metric (mn) for the given app version (j) -// Value is converted to string so that it can be printed in text and HTML reports. -func (r *Reporter) ScalarMetricValueStr(j int, mn string) string { - val := r.Result.Insights.ScalarMetricValue(j, mn) - if val != nil { - return fmt.Sprintf("%0.2f", *val) - } - return "unavailable" -} - -// MetricWithUnits provides the string representation of a metric name with units -func (r *Reporter) MetricWithUnits(metricName string) (string, error) { - in := r.Result.Insights - nm, err := base.NormalizeMetricName(metricName) - if err != nil { - return "", err - } - - m, err := in.GetMetricsInfo(nm) - if err != nil { - e := fmt.Errorf("unable to get metrics info for %v", nm) - log.Logger.Error(e) - return "", e - } - str := nm - if m.Units != nil { - str = fmt.Sprintf("%v (%v)", str, *m.Units) - } - return str, nil -} - -// GetBestVersions returns list of best versions for each metric -func (r *Reporter) GetBestVersions(metrics []string, in *base.Insights) []string { - results := make([]string, len(metrics)) - if in.Rewards == nil { - return results - } - - rewards := *in.Rewards - winners := *in.RewardsWinners - - for i, mn := range metrics { - j := indexString(rewards.Max, mn) - if j >= 0 { - if winners.Max[j] == -1 { - results[i] = "insufficient data" - } else { - results[i] = in.TrackVersionStr(winners.Max[j]) - } - } else { - j = indexString(rewards.Min, mn) - if j >= 0 { - if winners.Min[j] == -1 { - results[i] = "insufficient data" - } else { - results[i] = in.TrackVersionStr(winners.Min[j]) - } - } else { - results[i] = "n/a" - } - } - } - return results -} - -func indexString(keys []string, item string) int { - for i, key := range keys { - if key == item { - return i - } - } - return -1 -} diff --git a/action/report/doc.go b/action/report/doc.go deleted file mode 100644 index 124efe484..000000000 --- a/action/report/doc.go +++ /dev/null @@ -1,3 +0,0 @@ -// Package report contains primitives for reporting the results of an experiment. -// It supports text and HTML report formats. -package report diff --git a/action/report/htmlreport.tpl b/action/report/htmlreport.tpl deleted file mode 100644 index aecedee23..000000000 --- a/action/report/htmlreport.tpl +++ /dev/null @@ -1,202 +0,0 @@ - - - - - - - - - - - - Experiment Report - - - - - - - - - - - -
-

Iter8 Experiment Report

-
- - - - - - {{- if .Result.Insights }} - {{- if not (empty .Result.Insights.SLOs) }} -
-

Service level objectives (SLOs)

-

Whether or not SLOs are satisfied

-
- - - - - {{- if ge .Result.Insights.NumVersions 2 }} - {{- range until .Result.Insights.NumVersions }} - - {{- end}} - {{- else }} - - {{- end }} - - - - {{- range $ind, $slo := .Result.Insights.SLOs.Upper }} - - - {{- range (index $.Result.Insights.SLOsSatisfied.Upper $ind) }} - - {{- end }} - - {{- end}} - {{- range $ind, $slo := .Result.Insights.SLOs.Lower }} - - - {{- range (index $.Result.Insights.SLOsSatisfied.Lower $ind) }} - - {{- end }} - - {{- end}} - -
SLO Conditions{{ $.Result.Insights.TrackVersionStr . }}Satisfied
- - {{ $.MetricWithUnits $slo.Metric }} - - ≤ {{ $slo.Limit -}} - - -
- {{- $slo.Limit }} ≤ - - {{ $.MetricWithUnits $slo.Metric }} - - - -
-
- {{- end }} - - {{ if (.SortedVectorMetrics) }} -
-

Metric Histograms

-
- - {{- range $ind, $mn := .SortedVectorMetrics }} -
- - {{- end }} -
- {{- end }} - -
-

Latest observed values for metrics

-
- - - - - {{- if ge .Result.Insights.NumVersions 2 }} - {{- range until .Result.Insights.NumVersions }} - - {{- end}} - {{- if .Result.Insights.Rewards }} - - {{- end }} - {{- else }} - - {{- end }} - - - - {{- $metrics := .SortedScalarAndSLOMetrics }} - {{- $bestVersions := .GetBestVersions $metrics .Result.Insights }} - {{- range $ind, $mn := $metrics }} - - - {{- range until $.Result.Insights.NumVersions }} - - {{- end }} - {{- if $.Result.Insights.Rewards }} - - {{- end }} - - {{- end}} - -
Metric{{ $.Result.Insights.TrackVersionStr . }}BestValue
- - {{ $.MetricWithUnits $mn }} - - - {{ $.ScalarMetricValueStr . $mn }} - - {{ index $bestVersions $ind }} -
-
- {{- else }} -
-

Metrics-based Insights

-
-

Insights not found in experiment results. You may need to retry this report at a later time.

-
- {{- end }} - -
- - diff --git a/action/report/htmlreporter.go b/action/report/htmlreporter.go deleted file mode 100644 index 241e81a50..000000000 --- a/action/report/htmlreporter.go +++ /dev/null @@ -1,174 +0,0 @@ -package report - -import ( - "bytes" - "errors" - "fmt" - "io" - "math/rand" - "sort" - - htmlT "html/template" - - _ "embed" - - "github.com/Masterminds/sprig" - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" -) - -// HTMLReporter supports generation of HTML reports from experiments. -type HTMLReporter struct { - // Reporter enables access to all reporter data and methods - *Reporter -} - -// reportHTML is the HTML report template -// -//go:embed htmlreport.tpl -var reportHTML string - -// Gen creates an HTML report for a given experiment -func (ht *HTMLReporter) Gen(out io.Writer) error { - - // create HTML template - htpl, err := htmlT.New("report").Option("missingkey=error").Funcs(sprig.FuncMap()).Funcs(htmlT.FuncMap{ - "renderSLOSatisfiedHTML": renderSLOSatisfiedHTML, - "renderSLOSatisfiedCellClass": renderSLOSatisfiedCellClass, - }).Parse(reportHTML) - if err != nil { - e := errors.New("unable to parse HTML template") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - var b bytes.Buffer - if err = htpl.Execute(&b, ht); err != nil { - e := errors.New("unable to execute template") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - // print output - fmt.Fprintln(out, b.String()) - return nil -} - -// RenderStr is a helper method for rendering strings -// Used in HTML template -func (ht *HTMLReporter) RenderStr(what string) (string, error) { - var val string - var err error - switch what { - case "showClassStatus": - val = "show" - if ht.NoFailure() { - val = "" - } - case "textColorStatus": - val = "text-danger" - if ht.NoFailure() { - val = "text-success" - } - case "thumbsStatus": - val = "down" - if ht.NoFailure() { - val = "up" - } - case "msgStatus": - completionStatus := "Experiment completed." - if !ht.Completed() { - completionStatus = "Experiment has not completed." - } - failureStatus := "Experiment has failures." - if ht.NoFailure() { - failureStatus = "Experiment has no failures." - } - taskStatus := fmt.Sprintf("%v out of %v tasks are complete.", ht.Result.NumCompletedTasks, len(ht.Spec)) - loopStatus := fmt.Sprintf("%d loops have completed.", ht.Result.NumLoops) - val = fmt.Sprint(completionStatus) - val += " " - val += fmt.Sprint(failureStatus) - val += " " - val += fmt.Sprint(taskStatus) - val += " " - val += fmt.Sprint(loopStatus) - default: - err = fmt.Errorf("do not know how to render %v", what) - } - return val, err -} - -// MetricDescriptionHTML is used to described metrics in the metrics and SLO section of the HTML report -func (ht *HTMLReporter) MetricDescriptionHTML(metricName string) (string, error) { - in := ht.Result.Insights - nm, err := base.NormalizeMetricName(metricName) - if err != nil { - return "", err - } - - m, err := in.GetMetricsInfo(nm) - if err != nil { - e := fmt.Errorf("unable to get metrics info for %v", nm) - log.Logger.Error(e) - return "", e - } - return m.Description, nil -} - -// renderSLOSatisfiedHTML provides the HTML icon indicating if the SLO is satisfied -func renderSLOSatisfiedHTML(s bool) string { - if s { - return "fa-check-circle" - } - return "fa-times-circle" -} - -// renderSLOSatisfiedCellClass dictates the cell color indicating if the SLO is satisfied -func renderSLOSatisfiedCellClass(s bool) string { - if s { - return "text-success" - } - return "text-danger" -} - -// SortedVectorMetrics extracts vector metric names from experiment in sorted order -func (ht *HTMLReporter) SortedVectorMetrics() []string { - keys := []string{} - for k, mm := range ht.Result.Insights.MetricsInfo { - if mm.Type == base.HistogramMetricType || mm.Type == base.SampleMetricType { - keys = append(keys, k) - } - } - sort.Strings(keys) - return keys -} - -// sampleHist samples values from a histogram -func sampleHist(h []base.HistBucket) []float64 { - vals := []float64{} - for _, b := range h { - for i := 0; i < int(b.Count); i++ { - /* #nosec */ - vals = append(vals, b.Lower+(b.Upper-b.Lower)*rand.Float64()) - } - } - return vals -} - -// VectorMetricValue gets the value of the given vector metric for the given version -// If it is a histogram metric, then its values are sampled from the histogram -// Recall: VectorMetric can be a histogram metric or a sample metric. -func (ht *HTMLReporter) VectorMetricValue(i int, m string) []float64 { - in := ht.Result.Insights - mm, ok := in.MetricsInfo[m] - if !ok { - log.Logger.Error("could not find vector metric: ", m) - return nil - } - if mm.Type == base.SampleMetricType { - return in.NonHistMetricValues[i][m] - } - // this is a hist metric - return sampleHist(in.HistMetricValues[i][m]) -} diff --git a/action/report/report_test.go b/action/report/report_test.go deleted file mode 100644 index 90bd6e9d1..000000000 --- a/action/report/report_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package report - -import ( - "os" - "testing" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/driver" - "github.com/stretchr/testify/assert" -) - -func TestReportText(t *testing.T) { - _ = os.Chdir(t.TempDir()) - _ = copyFileToPwd(t, base.CompletePath("../../", "testdata/assertinputs/experiment.yaml")) - - fd := driver.FileDriver{ - RunDir: ".", - } - exp, err := base.BuildExperiment(&fd) - assert.NoError(t, err) - reporter := TextReporter{ - Reporter: &Reporter{ - Experiment: exp, - }, - } - err = reporter.Gen(os.Stdout) - assert.NoError(t, err) -} - -func TestReportTextWithLowerSLOs(t *testing.T) { - _ = os.Chdir(t.TempDir()) - _ = copyFileToPwd(t, base.CompletePath("../../", "testdata/assertinputs/experimentWithLowerSLOs.yaml")) - _ = os.Rename("experimentWithLowerSLOs.yaml", "experiment.yaml") - - fd := driver.FileDriver{ - RunDir: ".", - } - exp, err := base.BuildExperiment(&fd) - assert.NoError(t, err) - reporter := TextReporter{ - Reporter: &Reporter{ - Experiment: exp, - }, - } - err = reporter.Gen(os.Stdout) - assert.NoError(t, err) -} - -func TestReportHTMLWithLowerSLOs(t *testing.T) { - _ = os.Chdir(t.TempDir()) - _ = copyFileToPwd(t, base.CompletePath("../../", "testdata/assertinputs/experimentWithLowerSLOs.yaml")) - _ = os.Rename("experimentWithLowerSLOs.yaml", "experiment.yaml") - fd := driver.FileDriver{ - RunDir: ".", - } - exp, err := base.BuildExperiment(&fd) - assert.NoError(t, err) - reporter := HTMLReporter{ - Reporter: &Reporter{ - Experiment: exp, - }, - } - err = reporter.Gen(os.Stdout) - assert.NoError(t, err) -} diff --git a/action/report/test_helpers.go b/action/report/test_helpers.go deleted file mode 100644 index cdd1bd555..000000000 --- a/action/report/test_helpers.go +++ /dev/null @@ -1,32 +0,0 @@ -package report - -import ( - "errors" - "io" - "os" - "path/filepath" - "testing" -) - -// copyFileToPwd copies the specified file to pwd -func copyFileToPwd(t *testing.T, filePath string) error { - // get file - srcFile, err := os.Open(filepath.Clean(filePath)) - if err != nil { - return errors.New("could not open metrics file") - } - t.Cleanup(func() { - _ = srcFile.Close() - }) - - // create copy of file in pwd - destFile, err := os.Create(filepath.Base(filePath)) - if err != nil { - return errors.New("could not create copy of metrics file in temp directory") - } - t.Cleanup(func() { - _ = destFile.Close() - }) - _, _ = io.Copy(destFile, srcFile) - return nil -} diff --git a/action/report/textreport.tpl b/action/report/textreport.tpl deleted file mode 100644 index 5410455dc..000000000 --- a/action/report/textreport.tpl +++ /dev/null @@ -1,30 +0,0 @@ - -Experiment summary: -******************* - - Experiment completed: {{ .Completed }} - No task failures: {{ .NoFailure }} - Total number of tasks: {{ len .Spec }} - Number of completed tasks: {{ .Result.NumCompletedTasks }} - Number of completed loops: {{ .Result.NumLoops }} - -{{- if .Result.Insights }} -{{- if not (empty .Result.Insights.SLOs) }} - -Whether or not service level objectives (SLOs) are satisfied: -************************************************************* - -{{ .PrintSLOsText | indent 2 }} -{{- end }} - -Latest observed values for metrics: -*********************************** - -{{ .PrintMetricsText | indent 2 }} -{{- else }} - -Metrics-based Insights: -*********************** - - Insights not found in experiment results. You may need to retry this report at a later time. -{{- end }} diff --git a/action/report/textreporter.go b/action/report/textreporter.go deleted file mode 100644 index 1a6e9d9a4..000000000 --- a/action/report/textreporter.go +++ /dev/null @@ -1,195 +0,0 @@ -package report - -import ( - "bytes" - "errors" - "fmt" - "io" - "text/tabwriter" - textT "text/template" - - _ "embed" - - "github.com/Masterminds/sprig" - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" -) - -// TextReporter supports generation of text reports from experiments. -type TextReporter struct { - // Reporter is embedded and enables access to all reporter data and methods - *Reporter -} - -// reportText is the text report template -// -//go:embed textreport.tpl -var reportText string - -// Gen writes the text report for a given experiment into the given writer -func (tr *TextReporter) Gen(out io.Writer) error { - // create text template - ttpl, err := textT.New("report").Option("missingkey=error").Funcs(sprig.TxtFuncMap()).Parse(reportText) - if err != nil { - e := errors.New("unable to parse text template") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - var b bytes.Buffer - if err = ttpl.Execute(&b, tr); err != nil { - e := errors.New("unable to execute template") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - // print output - fmt.Fprintln(out, b.String()) - return nil -} - -// PrintSLOsText returns SLOs section of the text report as a string -func (tr *TextReporter) PrintSLOsText() string { - var b bytes.Buffer - w := tabwriter.NewWriter(&b, 0, 0, 1, ' ', tabwriter.Debug) - tr.printSLOsText(w) - return b.String() -} - -// getSLOStrText gets the text for an SLO -func (tr *TextReporter) getSLOStrText(i int, upper bool) (string, error) { - in := tr.Result.Insights - var slo base.SLO - if upper { - slo = in.SLOs.Upper[i] - } else { - slo = in.SLOs.Lower[i] - } - // get metric with units and description - str, err := tr.MetricWithUnits(slo.Metric) - if err != nil { - log.Logger.Error("unable to get slo metric with units") - return "", err - } - // add upper limit - if upper { - str = fmt.Sprintf("%v <= %v", str, slo.Limit) - } else { - // add lower limit - str = fmt.Sprintf("%v <= %v", slo.Limit, str) - } - return str, nil -} - -func (tr *TextReporter) printVersions(w *tabwriter.Writer) { - in := tr.Result.Insights - for i := 0; i < in.NumVersions; i++ { - fmt.Fprintf(w, "\t %s", in.TrackVersionStr(i)) - } -} - -// printSLOsText prints all SLOs into tab writer -func (tr *TextReporter) printSLOsText(w *tabwriter.Writer) { - in := tr.Result.Insights - fmt.Fprint(w, "SLO Conditions") - if in.NumVersions > 1 { - tr.printVersions(w) - } else { - fmt.Fprintf(w, "\t Satisfied") - } - fmt.Fprintln(w) - fmt.Fprint(w, "--------------") - for i := 0; i < in.NumVersions; i++ { - fmt.Fprint(w, "\t ---------") - } - fmt.Fprintln(w) - - if in.SLOs != nil { - log.Logger.Debug("SLOs are not nil") - log.Logger.Debug("found ", len(in.SLOs.Upper), " upper SLOs") - for i := 0; i < len(in.SLOs.Upper); i++ { - log.Logger.Debug("Upper SLO ", i) - str, err := tr.getSLOStrText(i, true) - if err == nil { - fmt.Fprint(w, str) - for j := 0; j < in.NumVersions; j++ { - fmt.Fprintf(w, "\t %v", in.SLOsSatisfied.Upper[i][j]) - } - fmt.Fprintln(w) - } else { - log.Logger.Error("unable to extract SLO text") - } - } - - log.Logger.Debug("found ", len(in.SLOs.Lower), " lower SLOs") - for i := 0; i < len(in.SLOs.Lower); i++ { - log.Logger.Debug("Lower SLO ", i) - str, err := tr.getSLOStrText(i, false) - if err == nil { - fmt.Fprint(w, str) - for j := 0; j < in.NumVersions; j++ { - fmt.Fprintf(w, "\t %v", in.SLOsSatisfied.Lower[i][j]) - } - fmt.Fprintln(w) - } else { - log.Logger.Error("unable to extract SLO text") - } - } - } - - _ = w.Flush() -} - -// PrintMetricsText returns metrics section of the text report as a string -func (tr *TextReporter) PrintMetricsText() string { - var b bytes.Buffer - w := tabwriter.NewWriter(&b, 0, 0, 1, ' ', tabwriter.Debug) - tr.printMetricsText(w) - return b.String() -} - -// printMetricsText prints metrics into tab writer -func (tr *TextReporter) printMetricsText(w *tabwriter.Writer) { - in := tr.Result.Insights - fmt.Fprint(w, "Metric") - if in.NumVersions > 1 { - tr.printVersions(w) - if in.Rewards != nil { - fmt.Fprintf(w, "\t Best") - } - } else { - fmt.Fprintf(w, "\t value") - } - fmt.Fprintln(w) - fmt.Fprint(w, "-------") - for i := 0; i < in.NumVersions; i++ { - fmt.Fprint(w, "\t -----") - } - if in.NumVersions > 1 && in.Rewards != nil { - fmt.Fprint(w, "\t ----") - } - fmt.Fprintln(w) - - // keys contain normalized scalar metric names in sorted order - keys := tr.SortedScalarAndSLOMetrics() - bestVersions := tr.GetBestVersions(keys, in) - - for i, mn := range keys { - mwu, err := tr.MetricWithUnits(mn) - if err == nil { - // add metric name with units - fmt.Fprint(w, mwu) - // add value - for j := 0; j < in.NumVersions; j++ { - fmt.Fprintf(w, "\t %v", tr.ScalarMetricValueStr(j, mn)) - } - if in.NumVersions > 1 && in.Rewards != nil { - fmt.Fprintf(w, "\t %s", bestVersions[i]) - } - fmt.Fprintln(w) - } else { - log.Logger.Error(err) - } - } - _ = w.Flush() -} diff --git a/action/report_test.go b/action/report_test.go deleted file mode 100644 index fe2a15f9c..000000000 --- a/action/report_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package action - -import ( - "context" - "os" - "testing" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/driver" - "github.com/stretchr/testify/assert" - "helm.sh/helm/v3/pkg/cli" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestKubeReportText(t *testing.T) { - _ = os.Chdir(t.TempDir()) - // fix rOpts - rOpts := NewReportOpts(driver.NewFakeKubeDriver(cli.New())) - - byteArray, _ := os.ReadFile(base.CompletePath("../testdata/assertinputs", driver.ExperimentPath)) - _, _ = rOpts.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{driver.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - err := rOpts.KubeRun(os.Stdout) - assert.NoError(t, err) -} - -func TestKubeReportHTML(t *testing.T) { - _ = os.Chdir(t.TempDir()) - // fix rOpts - rOpts := NewReportOpts(driver.NewFakeKubeDriver(cli.New())) - rOpts.OutputFormat = HTMLOutputFormatKey - - byteArray, _ := os.ReadFile(base.CompletePath("../testdata/assertinputs", driver.ExperimentPath)) - _, _ = rOpts.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{driver.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - err := rOpts.KubeRun(os.Stdout) - assert.NoError(t, err) -} - -func TestKubeReportInvalid(t *testing.T) { - _ = os.Chdir(t.TempDir()) - // fix rOpts - rOpts := NewReportOpts(driver.NewFakeKubeDriver(cli.New())) - rOpts.OutputFormat = "invalid" - - byteArray, _ := os.ReadFile(base.CompletePath("../testdata/assertinputs", driver.ExperimentPath)) - _, _ = rOpts.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{driver.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - err := rOpts.KubeRun(os.Stdout) - assert.ErrorContains(t, err, "unsupported report format") -} diff --git a/action/run.go b/action/run.go index c9e2a7df5..17dc6b389 100644 --- a/action/run.go +++ b/action/run.go @@ -32,5 +32,6 @@ func (rOpts *RunOpts) KubeRun() error { if err := rOpts.KubeDriver.InitKube(); err != nil { return err } - return base.RunExperiment(rOpts.ReuseResult, rOpts.KubeDriver) + + return base.RunExperiment(rOpts.KubeDriver) } diff --git a/action/run_test.go b/action/run_test.go index 96d8f7f69..e6da28b44 100644 --- a/action/run_test.go +++ b/action/run_test.go @@ -2,7 +2,10 @@ package action import ( "context" + "encoding/json" "fmt" + "io" + "net/http" "os" "testing" @@ -15,8 +18,16 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + myName = "myName" + myNamespace = "myNamespace" +) + func TestKubeRun(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(base.MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -24,6 +35,36 @@ func TestKubeRun(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) + // mock metrics server + base.StartHTTPMock(t) + metricsServerCalled := false + base.MockMetricsServer(base.MockMetricsServerInput{ + MetricsServerURL: metricsServerURL, + ExperimentResultCallback: func(req *http.Request) { + metricsServerCalled = true + + // check query parameters + assert.Equal(t, myName, req.URL.Query().Get("experiment")) + assert.Equal(t, myNamespace, req.URL.Query().Get("namespace")) + + // check payload + body, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.NotNil(t, body) + + // check payload content + bodyExperimentResult := base.ExperimentResult{} + err = json.Unmarshal(body, &bodyExperimentResult) + assert.NoError(t, err) + assert.NotNil(t, body) + + // no experiment failure + assert.False(t, bodyExperimentResult.Failure) + }, + }) + + _ = os.Chdir(t.TempDir()) + // create experiment.yaml base.CreateExperimentYaml(t, base.CompletePath("../testdata", "experiment.tpl"), url, driver.ExperimentPath) @@ -40,16 +81,9 @@ func TestKubeRun(t *testing.T) { StringData: map[string]string{driver.ExperimentPath: string(byteArray)}, }, metav1.CreateOptions{}) - err := rOpts.KubeRun() + err = rOpts.KubeRun() assert.NoError(t, err) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) - - // check results - exp, err := base.BuildExperiment(rOpts.KubeDriver) - assert.NoError(t, err) - assert.True(t, exp.Completed()) - assert.True(t, exp.NoFailure()) - assert.True(t, exp.SLOs()) - assert.Equal(t, 4, exp.Result.NumCompletedTasks) + assert.True(t, metricsServerCalled) } diff --git a/autox/application.tpl b/autox/application.tpl deleted file mode 100644 index 97a1b6f1c..000000000 --- a/autox/application.tpl +++ /dev/null @@ -1,39 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: {{ .Name }} - namespace: argocd - ownerReferences: - - apiVersion: v1 - kind: Secret - name: {{ .Owner.Name }} - uid: {{ .Owner.UID }} - finalizers: - - resources-finalizer.argocd.argoproj.io - labels: - app.kubernetes.io/managed-by: iter8 -spec: - destination: - namespace: {{ .Namespace }} - server: https://kubernetes.default.svc - project: default - source: - chart: {{ .Chart.Name }} - helm: - values: | - {{ .Chart.Values | toYaml | indent 8 | trim }} - repoURL: https://iter8-tools.github.io/iter8 - targetRevision: {{ .Chart.Version }} - ignoreDifferences: - - kind: Secret - name: {{ .Name }} - namespace: {{ .Namespace }} - jsonPointers: - - /data - - /metadata - syncPolicy: - automated: - selfHeal: true - syncOptions: - - CreateNamespace=true - - RespectIgnoreDifferences=true \ No newline at end of file diff --git a/autox/config.go b/autox/config.go deleted file mode 100644 index 6073ae76d..000000000 --- a/autox/config.go +++ /dev/null @@ -1,74 +0,0 @@ -package autox - -// config.go - reading of configuration (list of resources/namespaces to watch) - -import ( - "os" - "path/filepath" - - "github.com/iter8-tools/iter8/base/log" - - "sigs.k8s.io/yaml" -) - -// trigger specifies a Kubernetes resource object. When this Kubernetes resource object is created/updated/deleted, then the releaseGroupSpecs will be created/deleted. -type trigger struct { - Name string `json:"name,omitempty" yaml:"name,omitempty"` - - Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"` - - Group string `json:"group,omitempty" yaml:"group,omitempty"` - - Version string `json:"version,omitempty" yaml:"version,omitempty"` - - Resource string `json:"resource,omitempty" yaml:"resource,omitempty"` -} - -// releaseSpec points to a particular Helm releaseSpec -type releaseSpec struct { - // Name is the name of the Helm chart - Name string `json:"name" yaml:"name"` - - // Values is the values of the Helm chart - Values map[string]interface{} `json:"values" yaml:"values"` - - // Version is the version of the Helm chart - Version string `json:"version" yaml:"version"` -} - -// releaseGroupSpec is the configuration of all the Helm charts for a particular experiment group and their install trigger -type releaseGroupSpec struct { - // Trigger defines when the ReleaseSpecs should be installed - Trigger trigger `json:"trigger" yaml:"trigger"` - - // ReleaseSpecs is the set of Helm charts - // the keys in ReleaseSpecs are identifiers for each releaseSpec (releaseSpecID) - ReleaseSpecs map[string]releaseSpec `json:"releaseSpecs" yaml:"releaseSpecs"` -} - -// config is the configuration for all the Helm charts and their triggers -type config struct { - // Specs contains the releaseGroupSpecs, which contain the Helm charts and their triggers - // the keys in Specs are identifiers for each releaseGroupSpec (releaseGroupSpecID) - Specs map[string]releaseGroupSpec -} - -// readConfig reads YAML autoX config file and converts to a config object -func readConfig(fn string) (c config) { - // empty configuration - c = config{} - - yfile, err := os.ReadFile(filepath.Clean(fn)) - if err != nil { - log.Logger.Warnf("unable to read configuration file %s: %s", fn, err.Error()) - return c // empty configuration - } - - err = yaml.Unmarshal(yfile, &c) - if err != nil { - log.Logger.Warnf("invalid configuration file %s: %s", fn, err.Error()) - return c // empty configuration - } - - return c -} diff --git a/autox/config_test.go b/autox/config_test.go deleted file mode 100644 index a83bc5d42..000000000 --- a/autox/config_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package autox - -import ( - "path/filepath" - "runtime" - "testing" - - "github.com/stretchr/testify/assert" -) - -// utility method -func completePath(prefix string, suffix string) string { - _, filename, _, _ := runtime.Caller(1) // one step up the call stack - return filepath.Join(filepath.Dir(filename), prefix, suffix) -} - -func TestReadConfig(t *testing.T) { - for _, tt := range []struct { - name string - file string - numSpecGroups int - }{ - {"empty", "config.empty.yaml", 0}, - {"invalid", "config.invalid.yaml", 0}, - {"garbage", "config.garbage.yaml", 0}, - {"nofile", "config.nofile.yaml", 0}, - } { - t.Run(tt.name, func(t *testing.T) { - c := readConfig(completePath("../testdata/autox_inputs", tt.file)) - assert.Equal(t, tt.numSpecGroups, len(c.Specs)) - }) - } - - c := readConfig(completePath("../testdata/autox_inputs", "config.example.yaml")) - assert.Equal(t, 2, len(c.Specs)) - assert.Equal(t, 2, len(c.Specs["myApp"].ReleaseSpecs)) - assert.Equal(t, 1, len(c.Specs["myApp2"].ReleaseSpecs)) -} diff --git a/autox/doc.go b/autox/doc.go deleted file mode 100644 index bbdb56e51..000000000 --- a/autox/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package autox is the entry point for the autoX controller. -package autox diff --git a/autox/informer.go b/autox/informer.go deleted file mode 100644 index b3745299b..000000000 --- a/autox/informer.go +++ /dev/null @@ -1,451 +0,0 @@ -package autox - -// informer.go - informer(s) to watch desired resources/namespaces - -import ( - "bytes" - "context" - _ "embed" - "errors" - "fmt" - "reflect" - "sync" - "time" - - "github.com/iter8-tools/iter8/base" - "github.com/iter8-tools/iter8/base/log" - - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/dynamic/dynamicinformer" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/retry" - "sigs.k8s.io/yaml" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -const ( - // this label is used in secrets (to allow for ownership over the applications) - // label is set to the name of a release group spec (releaseGroupSpecName) - // there is a 1:1 mapping of secrets to release group specs - autoXGroupLabel = "iter8.tools/autox-group" - - iter8 = "iter8" - argocd = "argocd" - - autoXAdditionalValues = "autoXAdditionalValues" - - nameLabel = "app.kubernetes.io/name" - versionLabel = "app.kubernetes.io/version" - managedByLabel = "app.kubernetes.io/managed-by" - trackLabel = "iter8.tools/track" - - timeout = 15 * time.Second - interval = 1 * time.Second -) - -var applicationGVR = schema.GroupVersionResource{ - Group: "argoproj.io", - Version: "v1alpha1", - Resource: "applications", -} - -var applicationValuesPath = []string{"spec", "source", "helm", "values"} - -var m sync.Mutex - -//go:embed application.tpl -var tplStr string - -type chartAction int64 - -const ( - applyAction chartAction = 0 - deleteAction chartAction = 1 -) - -type owner struct { - Name string `json:"name" yaml:"name"` - UID string `json:"uid" yaml:"uid"` -} - -// applicationValues is the values for the (Argo CD) application template -type applicationValues struct { - // Name is the name of the application - Name string `json:"name" yaml:"name"` - - // Namespace is the namespace of the application - Namespace string `json:"namespace" yaml:"namespace"` - - // Owner is the release group spec secret for this application - // we create an secret for each release group spec - // this secret is assigned as the Owner of this spec - // when we delete the secret, the application is also deleted - Owner owner `json:"owner" yaml:"owner"` - - // Chart is the Helm Chart for this application - Chart releaseSpec `json:"chart" yaml:"chart"` -} - -// the name of a release will depend on: -// -// the name of the release group spec (releaseGroupSpecName) -// the ID of the release spec (releaseSpecID) -func getReleaseName(releaseGroupSpecName string, releaseSpecID string) string { - return fmt.Sprintf("autox-%s-%s", releaseGroupSpecName, releaseSpecID) -} - -// shouldCreateApplication will return true if an application should be created -// an application should be created if there is no preexisting application or -// if the values are different from those from the previous application -func shouldCreateApplication(values map[string]interface{}, releaseName string) bool { - // get application - uPApp, _ := k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.TODO(), releaseName, metav1.GetOptions{}) // *unstructured.Unstructured previous application - if uPApp != nil { - log.Logger.Debug(fmt.Sprintf("found previous application \"%s\"", releaseName)) - - // check if the previous application is managed by Iter8 - // (if it was previously created by Iter8) - if manager, ok := uPApp.GetLabels()[managedByLabel]; !ok || manager != iter8 { - log.Logger.Debug(fmt.Sprintf("previous application is not managed by Iter8 \"%s\"", releaseName)) - return false - } - - // extract values from previous application - pValuesString, _, err := unstructured.NestedString(uPApp.UnstructuredContent(), applicationValuesPath...) // pValuesString previous values - if err != nil { - log.Logger.Warn(fmt.Sprintf("cannot extract values of previous application \"%s\": %s: %s", releaseName, pValuesString, err)) - } - - var pValues map[string]interface{} - err = yaml.Unmarshal([]byte(pValuesString), &pValues) - if err != nil { - log.Logger.Warn(fmt.Sprintf("cannot parse values of previous application \"%s\": %s: %s", releaseName, pValuesString, err)) - } - - log.Logger.Debug(fmt.Sprintf("previous values: \"%s\"\nnew values: \"%s\"", pValues, values)) - - shouldCreateApplication := !reflect.DeepEqual(pValues, values) - if shouldCreateApplication { - log.Logger.Debug(fmt.Sprintf("replace previous application \"%s\"", releaseName)) - } else { - log.Logger.Debug(fmt.Sprintf("do not replace previous application \"%s\"", releaseName)) - } - - return shouldCreateApplication - } - - // there is no preexisting application, so should create one - return true -} - -func executeApplicationTemplate(applicationTemplate string, values applicationValues) (*unstructured.Unstructured, error) { - tpl, err := base.CreateTemplate(applicationTemplate) - if err != nil { - log.Logger.Error("could not create application template: ", err) - return nil, err - } - - var buf bytes.Buffer - err = tpl.Execute(&buf, values) - if err != nil { - log.Logger.Error("could not execute application template: ", err) - return nil, err - } - - jsonBytes, err := yaml.YAMLToJSON(buf.Bytes()) - if err != nil { - log.Logger.Error(fmt.Sprintf("could not convert YAML to JSON: \"%s\": \"%s\"", buf.String(), err)) - return nil, err - } - - // decode pending application into unstructured.UnstructuredJSONScheme - // source: https://github.com/kubernetes/client-go/blob/1ac8d459351e21458fd1041f41e43403eadcbdba/dynamic/simple.go#L186 - uncastObj, err := runtime.Decode(unstructured.UnstructuredJSONScheme, jsonBytes) - if err != nil { - log.Logger.Error(fmt.Sprintf("could not decode object into unstructured.UnstructuredJSONScheme: \"%s\": \"%s\"", buf.String(), err)) - return nil, err - } - - return uncastObj.(*unstructured.Unstructured), nil -} - -// applyApplication will apply an application based on a release spec -func applyApplication(releaseName string, releaseGroupSpecName string, releaseSpec releaseSpec, namespace string, additionalValues map[string]interface{}) error { - // get release group spec secret, based on autoX group label - // secret is assigned as the owner of the application - labelSelector := fmt.Sprintf("%s=%s", autoXGroupLabel, releaseGroupSpecName) - secretList, err := k8sClient.clientset.CoreV1().Secrets(argocd).List(context.TODO(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - if err != nil { - log.Logger.Error("could not list release group spec secrets: ", err) - return err - } - - // ensure that only one secret is found - if secretsLen := len(secretList.Items); secretsLen == 0 { - err = errors.New("expected release group spec secret with label selector" + labelSelector + "but none were found") - log.Logger.Error(err) - return err - } else if secretsLen > 1 { - err = errors.New("expected release group spec secret with label selector" + labelSelector + "but more than one were found") - log.Logger.Error(err) - return err - } - secret := secretList.Items[0] - - values := applicationValues{ // template values - Name: releaseName, - Namespace: namespace, - - Owner: owner{ - Name: secret.Name, - UID: string(secret.GetUID()), // assign the release group spec secret as the owner of the application - }, - - Chart: releaseSpec, - } - - // add additionalValues to the values - // Argo CD will create a new experiment if it sees that the additionalValues are different from the previous experiment - // additionalValues will contain the pruned labels from the Kubernetes object - if values.Chart.Values == nil { - values.Chart.Values = map[string]interface{}{} - } - values.Chart.Values[autoXAdditionalValues] = additionalValues - - // check if the pending application will be different from the previous application, if it exists - // only create a new application if it will be different (the values will be different) - if s := shouldCreateApplication(values.Chart.Values, releaseName); s { - // delete previous application if it exists - uPApp, _ := k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.TODO(), releaseName, metav1.GetOptions{}) // *unstructured.Unstructured previous application - if uPApp != nil { - if err1 := deleteApplication(releaseName); err1 != nil { - log.Logger.Error(fmt.Sprintf("could not delete previous application: \"%s\": \"%s\"", releaseName, err)) - } - } - - // execute application template - uApp, err := executeApplicationTemplate(tplStr, values) - if err != nil { - return err - } - - // apply application to the K8s cluster - log.Logger.Debug(fmt.Sprintf("apply application \"%s\"", releaseName)) - err = retry.OnError( - wait.Backoff{ - Steps: int(timeout / interval), - Cap: timeout, - Duration: interval, - Factor: 1.0, - Jitter: 0.1, - }, - func(err error) bool { - log.Logger.Error(err) - return true - }, // retry on all failures - func() error { - _, err = k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).Create(context.TODO(), uApp, metav1.CreateOptions{}) - return err - }, - ) - if err != nil { - log.Logger.Error(fmt.Sprintf("could not create application: \"%s\": \"%s\"", releaseName, err)) - return err - } - } - - return nil -} - -// deleteApplication deletes an application based on a given release name -func deleteApplication(releaseName string) error { - log.Logger.Debug(fmt.Sprintf("delete application \"%s\"", releaseName)) - - err := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).Delete(context.TODO(), releaseName, metav1.DeleteOptions{}) - if err != nil { - log.Logger.Error(fmt.Sprintf("could not delete application \"%s\": \"%s\"", releaseName, err)) - return err - } - - return nil -} - -// doChartAction iterates through a release group spec and performs apply/delete action for each release spec -// action can be apply or delete -func doChartAction(chartAction chartAction, releaseGroupSpecName string, releaseGroupSpec releaseGroupSpec, namespace string, additionalValues map[string]interface{}) error { - // get group - var err error - for releaseSpecID, releaseSpec := range releaseGroupSpec.ReleaseSpecs { - // get release name - releaseName := getReleaseName(releaseGroupSpecName, releaseSpecID) - - // perform action for this release - switch chartAction { - case applyAction: - // if there is an error, keep going forward in the for loop - if err1 := applyApplication(releaseName, releaseGroupSpecName, releaseSpec, namespace, additionalValues); err1 != nil { - err = errors.New("one or more Helm release applications failed") - } - - case deleteAction: - // if there is an error, keep going forward in the for loop - if err1 := deleteApplication(releaseName); err1 != nil { - err = errors.New("one or more Helm release deletions failed") - } - } - } - - if err != nil { - log.Logger.Error(err) - } - - return err -} - -// pruneLabels will extract the labels that are relevant for autoX -// currently, the important labels are: -// -// nameLabel = "app.kubernetes.io/name" -// versionLabel = "app.kubernetes.io/version" -// trackLabel = "iter8.tools/track" -func pruneLabels(labels map[string]string) map[string]interface{} { - prunedLabels := map[string]interface{}{} - for _, l := range []string{nameLabel, versionLabel, trackLabel} { - prunedLabels[l] = labels[l] - } - return prunedLabels -} - -// hasVersionLabel checks if version label is present -func hasVersionLabel(labels map[string]string) bool { - version, ok := labels[versionLabel] - return ok && version != "" -} - -// handle is the entry point to all (add, update, delete) event handlers -func handle(obj interface{}, releaseGroupSpecName string, releaseGroupSpec releaseGroupSpec) { - m.Lock() - defer m.Unlock() - - // parse object - u := obj.(*unstructured.Unstructured) - - // check if name matches trigger - name := u.GetName() - if name != releaseGroupSpec.Trigger.Name { - return - } - - // at this point, we know that we are really handling an event for the trigger object - // name, namespace, and GVR should all match - log.Logger.Debug(fmt.Sprintf("handle kubernetes resource object: name: \"%s\", namespace: \"%s\", kind: \"%s\", labels: \"%s\"", u.GetName(), u.GetNamespace(), u.GetKind(), u.GetLabels())) - - // namespace and GVR should already match trigger - ns := u.GetNamespace() - // Note: GVR is from the release group spec, not available through the obj - gvr := getGVR(releaseGroupSpec) - - // get (client) object from cluster - clientU, _ := k8sClient.dynamicClient.Resource(gvr).Namespace(ns).Get(context.TODO(), name, metav1.GetOptions{}) - - // if (client) object exists - // delete applications if (client) object does not have version label - // then apply applications if (client) object has version label - if clientU != nil { - // check if version label exists - clientLabels := clientU.GetLabels() - if !hasVersionLabel(clientLabels) { - log.Logger.Debugf("delete applications for release group \"%s\" (no %s label)", releaseGroupSpecName, versionLabel) - - _ = doChartAction(deleteAction, releaseGroupSpecName, releaseGroupSpec, "", nil) - - // if version label does not exist, there is no need to apply applications, so return - return - } - - // apply applications for the release group - clientPrunedLabels := pruneLabels(clientLabels) - _ = doChartAction(applyAction, releaseGroupSpecName, releaseGroupSpec, ns, clientPrunedLabels) - } else { // delete applications if (client) object does not exist - _ = doChartAction(deleteAction, releaseGroupSpecName, releaseGroupSpec, "", nil) - } -} - -// getGVR gets the namespace and GVR from a release group spec trigger -func getGVR(releaseGroupSpec releaseGroupSpec) schema.GroupVersionResource { - gvr := schema.GroupVersionResource{ - Group: releaseGroupSpec.Trigger.Group, - Version: releaseGroupSpec.Trigger.Version, - Resource: releaseGroupSpec.Trigger.Resource, - } - - return gvr -} - -func addObject(releaseGroupSpecName string, releaseGroupSpec releaseGroupSpec) func(obj interface{}) { - return func(obj interface{}) { - handle(obj, releaseGroupSpecName, releaseGroupSpec) - } -} - -func updateObject(releaseGroupSpecName string, releaseGroupSpec releaseGroupSpec) func(oldObj, obj interface{}) { - return func(oldObj, obj interface{}) { - handle(obj, releaseGroupSpecName, releaseGroupSpec) - } -} - -func deleteObject(releaseGroupSpecName string, releaseGroupSpec releaseGroupSpec) func(obj interface{}) { - return func(obj interface{}) { - handle(obj, releaseGroupSpecName, releaseGroupSpec) - } -} - -type iter8Watcher struct { - factories map[string]dynamicinformer.DynamicSharedInformerFactory -} - -func newIter8Watcher(autoXConfig config) *iter8Watcher { - w := &iter8Watcher{ - // the key is the name of the release group spec (releaseGroupSpecName) - factories: map[string]dynamicinformer.DynamicSharedInformerFactory{}, - } - - // create a factory for each trigger - // there is a 1:1 correspondence between each trigger and release group spec - // effectively, we are creating one factory per trigger - // the key to the factories map is the name of the release group spec (releaseGroupSpecName) - for releaseGroupSpecName, releaseGroupSpec := range autoXConfig.Specs { - releaseGroupSpecName := releaseGroupSpecName - releaseGroupSpec := releaseGroupSpec - - ns := releaseGroupSpec.Trigger.Namespace - gvr := getGVR(releaseGroupSpec) - - w.factories[releaseGroupSpecName] = dynamicinformer.NewFilteredDynamicSharedInformerFactory(k8sClient.dynamicClient, 0, ns, nil) - - informer := w.factories[releaseGroupSpecName].ForResource(gvr) - _, err := informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: addObject(releaseGroupSpecName, releaseGroupSpec), - UpdateFunc: updateObject(releaseGroupSpecName, releaseGroupSpec), - DeleteFunc: deleteObject(releaseGroupSpecName, releaseGroupSpec), - }) - - if err != nil { - log.Logger.Error(fmt.Sprintf("cannot add event handler for namespace \"%s\" and GVR \"%s\": \"%s\"", ns, gvr, err)) - } - } - - return w -} - -func (watcher *iter8Watcher) start(stopChannel chan struct{}) { - for _, f := range watcher.factories { - f.Start(stopChannel) - } -} diff --git a/autox/informer_test.go b/autox/informer_test.go deleted file mode 100644 index 946fff893..000000000 --- a/autox/informer_test.go +++ /dev/null @@ -1,366 +0,0 @@ -package autox - -import ( - // abnapp "github.com/iter8-tools/iter8/abn/application" - // "github.com/iter8-tools/iter8/abn/k8sclient" - - "context" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "helm.sh/helm/v3/pkg/cli" - v1 "k8s.io/api/core/v1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" -) - -// TestShouldCreateApplication tests the function shouldCreateApplication(), which determines if an application should -// created/updated based on whether or not there is a preexisting one -func TestShouldCreateApplication(t *testing.T) { - // 1) nothing in cluster - // therefore, return true (no concern for preexisting application) - k8sClient = newFakeKubeClient(cli.New()) - assert.True(t, shouldCreateApplication(map[string]interface{}{}, "test")) - - // 2) existing application, new application has the same values - // therefore, return false (not necessary to recreate application) - values := applicationValues{ - Name: "test", - Namespace: "default", - Chart: releaseSpec{ - Values: map[string]interface{}{}, - }, - } - - // simulating additional values - values.Chart.Values["hello"] = "world" - - // execute application template - uApp, err := executeApplicationTemplate(tplStr, values) - assert.NoError(t, err) - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Create(context.Background(), uApp, metav1.CreateOptions{}) - assert.NoError(t, err) - - // same values (values.Chart.Values) - // therefore, return false - assert.False(t, shouldCreateApplication(values.Chart.Values, "test")) - - // 3) existing application, new application has different values - // therefore, return true (old application can be replaced with new one) - - // different values - // therefore, return true - assert.True(t, shouldCreateApplication(map[string]interface{}{"something": "different"}, "test")) - - // 4) existing application but application is not managed by Iter8 - // therefore return false (Iter8 does not have permission to replace the old application) - - // setting managed by to something other than Iter8 - uApp.SetLabels(map[string]string{ - managedByLabel: "abc", - }) - - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Update(context.Background(), uApp, metav1.UpdateOptions{}) - assert.NoError(t, err) - - assert.False(t, shouldCreateApplication(map[string]interface{}{"something": "different"}, "test")) -} - -// TestApplyApplication tests the function applyApplication(), which applys Argo CD applications -func TestApplyApplication(t *testing.T) { - k8sClient = newFakeKubeClient(cli.New()) - - releaseGroupSpecName := "testReleaseGroupSpecName" - releaseSpecName := "testReleaseSpecName" - applicationName := fmt.Sprintf("autox-%s-%s", releaseGroupSpecName, releaseSpecName) - spec := releaseSpec{ - Name: applicationName, - Values: map[string]interface{}{}, - } - additionalValues := map[string]interface{}{} - - // 1) no release group spec secret - // therefore, fail - assert.Error(t, applyApplication(applicationName, releaseGroupSpecName, spec, "default", additionalValues)) - - // 2) create application with no conflicts - // create release group spec secret - // therefore, no fail - releaseGroupSpecSecret := v1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-secret", - Namespace: argocd, - Labels: map[string]string{ - "iter8.tools/autox-group": releaseGroupSpecName, - }, - }, - } - - _, err := k8sClient.clientset.CoreV1().Secrets(argocd).Create(context.Background(), &releaseGroupSpecSecret, metav1.CreateOptions{}) - assert.NoError(t, err) - - // ensure application does not exist - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.Background(), applicationName, metav1.GetOptions{}) - assert.Error(t, err) - - assert.NoError(t, applyApplication(applicationName, releaseGroupSpecName, spec, "default", additionalValues)) - - // ensure application exists - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.Background(), applicationName, metav1.GetOptions{}) - assert.NoError(t, err) - - // 3) create application with conflicts - // fallback is to do nothing - // therefore, no fail - assert.NoError(t, applyApplication(applicationName, releaseGroupSpecName, spec, "default", additionalValues)) -} - -// TestDeleteApplication tests the function deleteApplication(), which deletes Argo CD applications -func TestDeleteApplication(t *testing.T) { - k8sClient = newFakeKubeClient(cli.New()) - - releaseGroupSpecName := "testReleaseGroupSpecName" - releaseSpecName := "testReleaseSpecName" - applicationName := fmt.Sprintf("autox-%s-%s", releaseGroupSpecName, releaseSpecName) - - // 1) no application - // therefore, fail - assert.Error(t, deleteApplication(applicationName)) - - // 2) delete existing application - // therefore, no fail - - // create application - values := applicationValues{ - Name: applicationName, - Chart: releaseSpec{ - Name: applicationName, - Values: map[string]interface{}{}, - }, - } - uApp, err := executeApplicationTemplate(tplStr, values) - assert.NoError(t, err) - _, err = k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).Create(context.TODO(), uApp, metav1.CreateOptions{}) - assert.NoError(t, err) - - // ensure there is an application - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.Background(), applicationName, metav1.GetOptions{}) - assert.NoError(t, err) - - assert.NoError(t, deleteApplication(applicationName)) - - // ensure there is no application anymore - _, err = k8sClient.dynamicClient.Resource(applicationGVR).Namespace(argocd).Get(context.Background(), applicationName, metav1.GetOptions{}) - assert.Error(t, err) -} - -// Check to see if add, update, delete handlers from the watcher are properly invoked -// after the watcher is created using newIter8Watcher() -func TestNewIter8Watcher(t *testing.T) { - // autoX needs the config - autoXConfig := readConfig("../testdata/autox_inputs/config.example.yaml") - - namespace := "default" - releaseSpecName := "myApp" - version := "v1" - track := "" - appName1 := "autox-myApp-name1" - appName2 := "autox-myApp-name2" - - gvr := schema.GroupVersionResource{ - Group: "apps", - Version: "v1", - Resource: "deployments", - } - - // define and start watcher - k8sClient = newFakeKubeClient(cli.New()) - - // create releaseSpec secret - releaseGroupSpecSecret := v1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-secret", - Namespace: argocd, - Labels: map[string]string{ - "iter8.tools/autox-group": releaseSpecName, - }, - }, - } - _, err := k8sClient.clientset.CoreV1().Secrets(argocd).Create(context.Background(), &releaseGroupSpecSecret, metav1.CreateOptions{}) - assert.NoError(t, err) - - w := newIter8Watcher(autoXConfig) - assert.NotNil(t, w) - done := make(chan struct{}) - defer close(done) - w.start(done) - - // 1) create object with random name and no version label - // no application should be created - objRandNameNoAutoXLabel, err := k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Create( - context.TODO(), - newUnstructuredDeployment( - namespace, - "rand", // random name - "", // no version label - track, - map[string]string{}, - ), - metav1.CreateOptions{}, - ) - assert.NoError(t, err) - assert.NotNil(t, objRandNameNoAutoXLabel) - - // no applications - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 0) - }, 5*time.Second, time.Second) - - // 2) create object with random name and version label - // no application should be created - objRandNameAutoXLabel, err := k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Create( - context.TODO(), - newUnstructuredDeployment( - namespace, - "rand2", // random name - version, // version label - track, - map[string]string{}, - ), - metav1.CreateOptions{}, - ) - assert.NoError(t, err) - assert.NotNil(t, objRandNameAutoXLabel) - - // no applications - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 0) - }, 5*time.Second, time.Second) - - // 3) create object with trigger name and no version label - // no application should be created - objNoAutoXLabel, err := k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Create( - context.TODO(), - newUnstructuredDeployment( - namespace, - releaseSpecName, // trigger name - "", // no version label - track, - map[string]string{}), - metav1.CreateOptions{}, - ) - assert.NoError(t, err) - assert.NotNil(t, objNoAutoXLabel) - - // no applications - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 0) - }, 5*time.Second, time.Second) - - // delete the object so we can recreate it with autoX label - err = k8sClient.dynamic().Resource(gvr).Namespace(namespace).Delete(context.TODO(), releaseSpecName, metav1.DeleteOptions{}) - assert.NoError(t, err) - - // 4) create object with trigger name with version label - // 2 applications should be created - // one for each release spec in the config - // autox-myapp-name1 and autox-myapp-name2 - createdObj, err := k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Create( - context.TODO(), - newUnstructuredDeployment( - namespace, - releaseSpecName, // trigger name - version, // version label - track, - map[string]string{}, - ), - metav1.CreateOptions{}, - ) - assert.NoError(t, err) - assert.NotNil(t, createdObj) - - // 2 applications - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 2) - }, 5*time.Second, time.Second) - - // check applications by name - assert.Eventually(t, func() bool { - app, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).Get(context.Background(), appName1, metav1.GetOptions{}) - return assert.NotNil(t, app) - }, 5*time.Second, time.Second) - assert.Eventually(t, func() bool { - app, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).Get(context.Background(), appName2, metav1.GetOptions{}) - return assert.NotNil(t, app) - }, 5*time.Second, time.Second) - - // 5) delete version label - // all applications deleted - (createdObj.Object["metadata"].(map[string]interface{}))["labels"].(map[string]interface{})[versionLabel] = nil - _, err = k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Update( - context.TODO(), - createdObj, - metav1.UpdateOptions{}, - ) - assert.NoError(t, err) - - // 0 applications - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 0) - }, 5*time.Second, time.Second) -} - -func newUnstructuredDeployment(namespace, application, version, track string, additionalLabels map[string]string) *unstructured.Unstructured { - annotations := map[string]interface{}{ - "iter8.tools/ready": "true", - } - if track != "" { - annotations[trackLabel] = track - } - - labels := map[string]interface{}{ - nameLabel: application, - versionLabel: version, - "iter8.tools/ready": "true", - } - - // add additionalLabels to labels - if len(additionalLabels) > 0 { - for labelName, labelValue := range additionalLabels { - labels[labelName] = labelValue - } - } - - return &unstructured.Unstructured{ - Object: map[string]interface{}{ - "apiVersion": "apps/v1", - "kind": "Deployment", - "metadata": map[string]interface{}{ - "namespace": namespace, - "name": application, - "labels": labels, - "annotations": annotations, - }, - "spec": application, - }, - } -} diff --git a/autox/k8sclient.go b/autox/k8sclient.go deleted file mode 100644 index 94314eb51..000000000 --- a/autox/k8sclient.go +++ /dev/null @@ -1,71 +0,0 @@ -package autox - -import ( - "errors" - - "github.com/iter8-tools/iter8/base/log" - - "helm.sh/helm/v3/pkg/cli" - - // Import to initialize client auth plugins. - _ "k8s.io/client-go/plugin/pkg/client/auth" - - "k8s.io/client-go/dynamic" - "k8s.io/client-go/kubernetes" -) - -// kubeClient embeds Kube configuration, and -// enables interaction with a Kubernetes cluster through Kube APIs -type kubeClient struct { - // EnvSettings provides generic Kubernetes options - *cli.EnvSettings - - // clientset enables interaction with a Kubernetes cluster using structured types - clientset kubernetes.Interface - - // dynamicClient enables unstructured interaction with a Kubernetes cluster - dynamicClient dynamic.Interface -} - -// newKubeClient creates an empty KubeClient -func newKubeClient(s *cli.EnvSettings) *kubeClient { - return &kubeClient{ - EnvSettings: s, - // default other fields - } -} - -// init initializes the Kubernetes clientset -func (c *kubeClient) init() (err error) { - if c.dynamicClient == nil { - // get rest config - restConfig, err := c.EnvSettings.RESTClientGetter().ToRESTConfig() - if err != nil { - e := errors.New("unable to get Kubernetes REST config") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - // get clientset - c.clientset, err = kubernetes.NewForConfig(restConfig) - if err != nil { - e := errors.New("unable to get Kubernetes clientset") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - - // get dynamic client - c.dynamicClient, err = dynamic.NewForConfig(restConfig) - if err != nil { - e := errors.New("unable to get Kubernetes dynamic client") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - } - - return nil -} - -func (c *kubeClient) dynamic() dynamic.Interface { - return c.dynamicClient -} diff --git a/autox/k8sclient_test.go b/autox/k8sclient_test.go deleted file mode 100644 index efb2daeb1..000000000 --- a/autox/k8sclient_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package autox - -import ( - "helm.sh/helm/v3/pkg/cli" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - dynamicfake "k8s.io/client-go/dynamic/fake" - "k8s.io/client-go/kubernetes/fake" - ktesting "k8s.io/client-go/testing" -) - -// newFakeKubeClient returns a fake Kubernetes client that is able to manage secrets -// Includes dynamic client with Deployments as listed objects -// Used by test cases in several packages to define (global) k8sclient.Client for testing -func newFakeKubeClient(s *cli.EnvSettings, objects ...runtime.Object) *kubeClient { - fakeClient := kubeClient{ - EnvSettings: s, - // default other fields - } - - // secretDataReactor sets the secret.Data field based on the values from secret.StringData - // Credit: this function is adapted from https://github.com/creydr/go-k8s-utils - var secretDataReactor = func(action ktesting.Action) (bool, runtime.Object, error) { - secret, _ := action.(ktesting.CreateAction).GetObject().(*corev1.Secret) - - if secret.Data == nil { - secret.Data = make(map[string][]byte) - } - - for k, v := range secret.StringData { - secret.Data[k] = []byte(v) - } - - return false, nil, nil - } - - fc := fake.NewSimpleClientset(objects...) - fc.PrependReactor("create", "secrets", secretDataReactor) - fc.PrependReactor("update", "secrets", secretDataReactor) - fakeClient.clientset = fc - - // fakeClient.dynamicClient = dynamicfake.NewSimpleDynamicClient(runtime.NewScheme()) - fakeClient.dynamicClient = dynamicfake.NewSimpleDynamicClientWithCustomListKinds( - runtime.NewScheme(), - map[schema.GroupVersionResource]string{ - {Group: "apps", Version: "v1", Resource: "deployments"}: "DeploymentList", - applicationGVR: "ApplicationList", - }, - objects...) - - return &fakeClient -} diff --git a/autox/watcher.go b/autox/watcher.go deleted file mode 100644 index dc3928b7f..000000000 --- a/autox/watcher.go +++ /dev/null @@ -1,91 +0,0 @@ -package autox - -import ( - "fmt" - "os" - - "github.com/iter8-tools/iter8/base/log" - - "helm.sh/helm/v3/pkg/cli" -) - -const ( - // configEnv is the name of environment variable with file path to the config - configEnv = "CONFIG" -) - -var k8sClient *kubeClient - -// validateConfig validates config, which contains all the release group specs -func validateConfig(c config) error { - var err error - - triggerStrings := map[string]bool{} - - // iterate through all the release group specs - for releaseGroupSpecID, releaseGroupSpec := range c.Specs { - // validate trigger - if releaseGroupSpec.Trigger.Name == "" { - err = fmt.Errorf("trigger in spec group \"%s\" does not have a name", releaseGroupSpecID) - break - } - - if releaseGroupSpec.Trigger.Namespace == "" { - err = fmt.Errorf("trigger in spec group \"%s\" does not have a namespace", releaseGroupSpecID) - break - } - - if releaseGroupSpec.Trigger.Version == "" { - err = fmt.Errorf("trigger in spec group \"%s\" does not have a version", releaseGroupSpecID) - break - } - - if releaseGroupSpec.Trigger.Resource == "" { - err = fmt.Errorf("trigger in spec group \"%s\" does not have a resource", releaseGroupSpecID) - break - } - - // check for trigger uniqueness - triggerString := fmt.Sprintf("%s/%s/%s/%s/%s", releaseGroupSpec.Trigger.Name, releaseGroupSpec.Trigger.Namespace, releaseGroupSpec.Trigger.Group, releaseGroupSpec.Trigger.Version, releaseGroupSpec.Trigger.Resource) - if _, ok := triggerStrings[triggerString]; ok { - err = fmt.Errorf("multiple release specs with the same trigger: name: \"%s\", namespace: \"%s\", group: \"%s\", version: \"%s\", resource: \"%s\",", releaseGroupSpec.Trigger.Name, releaseGroupSpec.Trigger.Namespace, releaseGroupSpec.Trigger.Group, releaseGroupSpec.Trigger.Version, releaseGroupSpec.Trigger.Resource) - break - } - triggerStrings[triggerString] = true - } - - return err -} - -// Start is entry point to configure services and start them -func Start(stopCh chan struct{}, autoxK *kubeClient) error { - if autoxK == nil { - // get a default client - k8sClient = newKubeClient(cli.New()) - } else { - // set it here - k8sClient = autoxK - } - - // initialize kubernetes driver - if err := k8sClient.init(); err != nil { - log.Logger.Fatal("unable to init k8s client") - } - - // read release group specs - configFile, ok := os.LookupEnv(configEnv) - if !ok { - log.Logger.Fatal("group configuration file is required") - } - config := readConfig(configFile) - - // validate the release group specs - err := validateConfig(config) - if err != nil { - return err - } - - w := newIter8Watcher(config) - go w.start(stopCh) - return nil -} diff --git a/autox/watcher_test.go b/autox/watcher_test.go deleted file mode 100644 index 5dfcee244..000000000 --- a/autox/watcher_test.go +++ /dev/null @@ -1,156 +0,0 @@ -package autox - -import ( - "context" - "fmt" - "os" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "helm.sh/helm/v3/pkg/cli" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" -) - -func TestStart(t *testing.T) { - // Start() requires some environment variables to be set - _ = os.Setenv(configEnv, "../testdata/autox_inputs/config.example.yaml") - - stopCh := make(chan struct{}) - defer close(stopCh) - _ = Start(stopCh, newFakeKubeClient(cli.New())) - - gvr := schema.GroupVersionResource{ - Group: "apps", - Version: "v1", - Resource: "deployments", - } - namespace := "default" - releaseSpecName := "myApp" - version := "v1" - track := "" - - // create releaseSpec secret - releaseGroupSpecSecret := v1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-secret", - Namespace: argocd, - Labels: map[string]string{ - "iter8.tools/autox-group": releaseSpecName, - }, - }, - } - _, err := k8sClient.clientset.CoreV1().Secrets(argocd).Create(context.Background(), &releaseGroupSpecSecret, metav1.CreateOptions{}) - assert.NoError(t, err) - - createdObj, err := k8sClient.dynamic(). - Resource(gvr).Namespace(namespace). - Create( - context.TODO(), - newUnstructuredDeployment( - namespace, - releaseSpecName, - version, - track, - map[string]string{ - // autoXLabel: "true", // add the autoXLabel, which will allow applyApplication() to trigger - }, - ), metav1.CreateOptions{}, - ) - assert.NoError(t, err) - assert.NotNil(t, createdObj) - - // 2 applications - // one for each release spec in the config - // autox-myapp-name1 and autox-myapp-name2 - assert.Eventually(t, func() bool { - list, _ := k8sClient.dynamic().Resource(applicationGVR).Namespace(argocd).List(context.Background(), metav1.ListOptions{}) - return assert.Equal(t, len(list.Items), 2) - }, 5*time.Second, time.Second) -} - -func TestValidateConfig(t *testing.T) { - tests := []struct { - c config - err string - }{ - { - config{ - Specs: map[string]releaseGroupSpec{ - "test": {}, - }, - }, - "trigger in spec group \"test\" does not have a name", - }, - { - config{ - Specs: map[string]releaseGroupSpec{ - "test": { - Trigger: trigger{ - Name: "test", - }, - }, - }, - }, - "trigger in spec group \"test\" does not have a namespace", - }, - { - config{ - Specs: map[string]releaseGroupSpec{ - "test": { - Trigger: trigger{ - Name: "test", - Namespace: "default", - }, - }, - }, - }, - "trigger in spec group \"test\" does not have a version", - }, - { - config{ - Specs: map[string]releaseGroupSpec{ - "test": { - Trigger: trigger{ - Name: "test", - Namespace: "default", - Version: "v1", - }, - }, - }, - }, - "trigger in spec group \"test\" does not have a resource", - }, - { - config{ - Specs: map[string]releaseGroupSpec{ - "test": { - Trigger: trigger{ - Name: "test", - Namespace: "default", - Version: "v1", - Resource: "deployments", - }, - }, - "test2": { - Trigger: trigger{ - Name: "test", - Namespace: "default", - Version: "v1", - Resource: "deployments", - }, - }, - }, - }, - "multiple release specs with the same trigger: name: \"test\", namespace: \"default\", group: \"\", version: \"v1\", resource: \"deployments\",", - }, - } - - for _, e := range tests { - err := validateConfig(e.c) - fmt.Println(err) - assert.EqualError(t, err, e.err) - } -} diff --git a/base/assess.go b/base/assess.go deleted file mode 100644 index 7bc44bcfa..000000000 --- a/base/assess.go +++ /dev/null @@ -1,161 +0,0 @@ -package base - -import ( - "errors" - - "github.com/iter8-tools/iter8/base/log" -) - -// assessInputs contain the inputs to the assess-app-versions task to be executed. -type assessInputs struct { - // Rewards are the reward metrics - Rewards *Rewards `json:"rewards,omitempty" yaml:"rewards,omitempty"` - - // SLOs are the SLO limits - SLOs *SLOLimits `json:"SLOs,omitempty" yaml:"SLOs,omitempty"` -} - -// assessTask enables assessment of versions -type assessTask struct { - // TaskMeta has fields common to all tasks - TaskMeta - // With contains the inputs to this task - With assessInputs `json:"with" yaml:"with"` -} - -const ( - // AssessTaskName is the name of the task this file implements - AssessTaskName = "assess" -) - -// initializeDefaults sets default values for task inputs -func (t *assessTask) initializeDefaults() {} - -// validateInputs for this task -func (t *assessTask) validateInputs() error { - return nil -} - -// Run executes the assess-app-versions task -func (t *assessTask) run(exp *Experiment) error { - err := t.validateInputs() - if err != nil { - return err - } - - t.initializeDefaults() - - if exp.Result.Insights == nil { - log.Logger.Error("uninitialized insights within experiment") - return errors.New("uninitialized insights within experiment") - } - if t.With.SLOs == nil || - exp.Result.Insights.NumVersions == 0 { - // do nothing for now - // todo: fix when rewards are introduced - - log.Logger.Warn("nothing to do; returning") - return nil - } - - // set rewards (if needed) - err = exp.Result.Insights.setRewards(t.With.Rewards) - if err != nil { - return err - } - - // set SLOs (if needed) - err = exp.Result.Insights.setSLOs(t.With.SLOs) - if err != nil { - return err - } - - // set initialize SLOsSatisfied (if needed) - err = exp.initializeSLOsSatisfied() - if err != nil { - return err - } - - // set SLOsSatisfied - if t.With.SLOs != nil { - exp.Result.Insights.SLOsSatisfied = &SLOResults{ - Upper: evaluateSLOs(exp, t.With.SLOs.Upper, true), - Lower: evaluateSLOs(exp, t.With.SLOs.Lower, false), - } - } - - // set RewardsWinners - if t.With.Rewards != nil { - exp.Result.Insights.RewardsWinners = &RewardsWinners{ - Max: evaluateRewards(exp, t.With.Rewards.Max, true), - Min: evaluateRewards(exp, t.With.Rewards.Min, false), - } - } - - return err -} - -func evaluateRewards(exp *Experiment, rewards []string, max bool) []int { - winners := make([]int, len(rewards)) - for i := 0; i < len(rewards); i++ { - for j := 0; j < exp.Result.Insights.NumVersions; j++ { - winners[i] = identifyWinner(exp, rewards[i], max) - } - } - return winners -} - -func identifyWinner(e *Experiment, reward string, max bool) int { - currentWinner := -1 - var currentWinningValue *float64 - - for j := 0; j < e.Result.Insights.NumVersions; j++ { - val := e.Result.Insights.ScalarMetricValue(j, reward) - if val == nil { - log.Logger.Warnf("unable to find value for version %v and metric %s", j, reward) - continue - } - if currentWinningValue == nil || (max && *val > *currentWinningValue) || (!max && *val < *currentWinningValue) { - currentWinningValue = val - currentWinner = j - } - } - - return currentWinner -} - -// evaluate SLOs and output the boolean SLO X version matrix -func evaluateSLOs(exp *Experiment, slos []SLO, upper bool) [][]bool { - slosSatisfied := make([][]bool, len(slos)) - for i := 0; i < len(slos); i++ { - slosSatisfied[i] = make([]bool, exp.Result.Insights.NumVersions) - for j := 0; j < exp.Result.Insights.NumVersions; j++ { - slosSatisfied[i][j] = sloSatisfied(exp, slos, i, j, upper) - } - } - return slosSatisfied -} - -// sloSatisfied returns true if SLO i satisfied by version j -func sloSatisfied(e *Experiment, slos []SLO, i int, j int, upper bool) bool { - val := e.Result.Insights.ScalarMetricValue(j, slos[i].Metric) - // check if metric is available - if val == nil { - log.Logger.Warnf("unable to find value for version %v and metric %s", j, slos[i].Metric) - return false - } - - if upper { - // check upper limit - if *val > slos[i].Limit { - return false - } - } else { - // check lower limit - if *val < slos[i].Limit { - return false - } - } - - return true -} diff --git a/base/assess_test.go b/base/assess_test.go deleted file mode 100644 index bed6128e3..000000000 --- a/base/assess_test.go +++ /dev/null @@ -1,44 +0,0 @@ -package base - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" -) - -// Test a runnable assert condition here -func TestRunAssess(t *testing.T) { - _ = os.Chdir(t.TempDir()) - // simple assess without any SLOs - // should succeed - task := &assessTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(AssessTaskName), - }, - With: assessInputs{}, - } - exp := &Experiment{ - Spec: []Task{task}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - err := task.run(exp) - assert.NoError(t, err) - - // assess with an SLO - // should succeed - task.With = assessInputs{ - SLOs: &SLOLimits{ - Upper: []SLO{{ - Metric: "a/b", - Limit: 20.0, - }}, - }, - Rewards: &Rewards{ - Max: []string{"a/b"}, - }, - } - err = task.run(exp) - assert.NoError(t, err) -} diff --git a/base/collect_grpc.go b/base/collect_grpc.go index 60afcedc5..619665d23 100644 --- a/base/collect_grpc.go +++ b/base/collect_grpc.go @@ -13,16 +13,6 @@ import ( const ( // CollectGRPCTaskName is the name of this task which performs load generation and metrics collection for gRPC services. CollectGRPCTaskName = "grpc" - // gRPC metric prefix - gRPCMetricPrefix = "grpc" - // gRPCRequestCountMetricName is name of the gRPC request count metric - gRPCRequestCountMetricName = "request-count" - // gRPCErrorCountMetricName is name of the gRPC error count metric - gRPCErrorCountMetricName = "error-count" - // gRPCErrorRateMetricName is name of the gRPC error rate metric - gRPCErrorRateMetricName = "error-rate" - // gRPCLatencySampleMetricName is name of the gRPC latency sample metric - gRPCLatencySampleMetricName = "latency" // countErrorsDefault is the default value which indicates if errors are counted countErrorsDefault = true // insucureDefault is the default value which indicates that plaintext and insecure connection should be used @@ -49,6 +39,11 @@ type collectGRPCTask struct { With collectGRPCInputs `json:"with" yaml:"with"` } +// GHZResult is the raw data sent to the metrics server +// This data will be transformed into httpDashboard when getGHZGrafana is called +// Key is the endpoint +type GHZResult map[string]*runner.Report + // initializeDefaults sets default values for the collect task func (t *collectGRPCTask) initializeDefaults() { // set defaults @@ -71,11 +66,11 @@ func (t *collectGRPCTask) validateInputs() error { } // resultForVersion collects gRPC test result for a given version -func (t *collectGRPCTask) resultForVersion() (map[string]*runner.Report, error) { +func (t *collectGRPCTask) resultForVersion() (GHZResult, error) { // the main idea is to run ghz with proper options var err error - results := map[string]*runner.Report{} + results := GHZResult{} if len(t.With.Endpoints) > 0 { log.Logger.Trace("multiple endpoints") @@ -96,7 +91,7 @@ func (t *collectGRPCTask) resultForVersion() (map[string]*runner.Report, error) // merge endpoint options with baseline options if err := mergo.Merge(&endpoint, t.With.Config); err != nil { - log.Logger.Error(fmt.Sprintf("could not merge Fortio options for endpoint \"%s\"", endpointID)) + log.Logger.Error(fmt.Sprintf("could not merge ghz options for endpoint \"%s\"", endpointID)) return nil, err } eOpts := runner.WithConfig(&endpoint) // endpoint options @@ -108,7 +103,7 @@ func (t *collectGRPCTask) resultForVersion() (map[string]*runner.Report, error) continue } - results[gRPCMetricPrefix+"-"+endpointID] = igr + results[endpointID] = igr } } else { // TODO: supply all the allowed options @@ -121,21 +116,12 @@ func (t *collectGRPCTask) resultForVersion() (map[string]*runner.Report, error) return results, err } - results[gRPCMetricPrefix] = igr + results[t.With.Call] = igr } return results, err } -// latencySample extracts a latency sample from ghz result details -func latencySample(rd []runner.ResultDetail) []float64 { - f := make([]float64, len(rd)) - for i := 0; i < len(rd); i++ { - f[i] = float64(rd[i].Latency.Milliseconds()) - } - return f -} - // Run executes this task func (t *collectGRPCTask) run(exp *Experiment) error { // 1. initialize defaults @@ -164,68 +150,13 @@ func (t *collectGRPCTask) run(exp *Experiment) error { return nil } - // 3. Init insights with num versions: always 1 in this task + // 3. init insights with num versions: always 1 in this task if err = exp.Result.initInsightsWithNumVersions(1); err != nil { return err } - in := exp.Result.Insights - - // 4. Populate all metrics collected by this task - for provider, data := range data { - // populate grpc request count - // todo: this logic breaks for looped experiments. Fix when we get to loops. - m := provider + "/" + gRPCRequestCountMetricName - mm := MetricMeta{ - Description: "number of gRPC requests sent", - Type: CounterMetricType, - } - if err = in.updateMetric(m, mm, 0, float64(data.Count)); err != nil { - return err - } - - // populate error count & rate - ec := float64(0) - for _, count := range data.ErrorDist { - ec += float64(count) - } - - // populate count - // todo: This logic breaks for looped experiments. Fix when we get to loops. - m = provider + "/" + gRPCErrorCountMetricName - mm = MetricMeta{ - Description: "number of responses that were errors", - Type: CounterMetricType, - } - if err = in.updateMetric(m, mm, 0, ec); err != nil { - return err - } - // populate rate - // todo: This logic breaks for looped experiments. Fix when we get to loops. - m = provider + "/" + gRPCErrorRateMetricName - rc := float64(data.Count) - if rc != 0 { - mm = MetricMeta{ - Description: "fraction of responses that were errors", - Type: GaugeMetricType, - } - if err = in.updateMetric(m, mm, 0, ec/rc); err != nil { - return err - } - } - - // populate latency sample - m = provider + "/" + gRPCLatencySampleMetricName - mm = MetricMeta{ - Description: "gRPC Latency Sample", - Type: SampleMetricType, - Units: StringPointer("msec"), - } - lh := latencySample(data.Details) - if err = in.updateMetric(m, mm, 0, lh); err != nil { - return err - } - } + // 4. write data to Insights + exp.Result.Insights.TaskData[CollectGRPCTaskName] = data return nil } diff --git a/base/collect_grpc_test.go b/base/collect_grpc_test.go index 2e70a9cf3..ad77293db 100644 --- a/base/collect_grpc_test.go +++ b/base/collect_grpc_test.go @@ -1,21 +1,21 @@ package base import ( + "encoding/json" "os" "strings" "testing" - "time" "github.com/bojand/ghz/runner" "github.com/iter8-tools/iter8/base/internal" "github.com/iter8-tools/iter8/base/internal/helloworld/helloworld" "github.com/iter8-tools/iter8/base/log" "github.com/stretchr/testify/assert" - "sigs.k8s.io/yaml" ) const ( unary = "unary" + unary2 = "unary2" server = "server" client = "client" bidirectional = "bidirectional" @@ -24,6 +24,13 @@ const ( // Credit: Several of the tests in this file are based on // https://github.com/bojand/ghz/blob/master/runner/run_test.go func TestRunCollectGRPCUnary(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + + call := "helloworld.Greeter.SayHello" + _ = os.Chdir(t.TempDir()) callType := helloworld.Unary gs, s, err := internal.StartServer(false) @@ -40,7 +47,7 @@ func TestRunCollectGRPCUnary(t *testing.T) { With: collectGRPCInputs{ Config: runner.Config{ Data: map[string]interface{}{"name": "bob"}, - Call: "helloworld.Greeter.SayHello", + Call: call, Host: internal.LocalHostPort, }, }, @@ -51,6 +58,10 @@ func TestRunCollectGRPCUnary(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) err = ct.run(exp) @@ -63,21 +74,17 @@ func TestRunCollectGRPCUnary(t *testing.T) { count := gs.GetCount(callType) assert.Equal(t, 200, count) - mm, err := exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "/" + gRPCErrorCountMetricName) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskData := exp.Result.Insights.TaskData[CollectGRPCTaskName] + assert.NotNil(t, taskData) - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "/" + gRPCLatencySampleMetricName) - assert.NotNil(t, mm) + taskDataBytes, err := json.Marshal(taskData) assert.NoError(t, err) - - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "/" + gRPCLatencySampleMetricName + "/" + string(MaxAggregator)) - assert.NotNil(t, mm) + ghzResult := GHZResult{} + err = json.Unmarshal(taskDataBytes, &ghzResult) assert.NoError(t, err) - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "/" + gRPCLatencySampleMetricName + "/" + PercentileAggregatorPrefix + "50") - assert.NotNil(t, mm) - assert.NoError(t, err) + assert.Equal(t, 1, len(ghzResult)) + assert.NotNil(t, ghzResult[call]) } // If the endpoint does not exist, fail gracefully @@ -113,7 +120,12 @@ func TestRunCollectGRPCUnaryNoEndpoint(t *testing.T) { // Credit: Several of the tests in this file are based on // https://github.com/bojand/ghz/blob/master/runner/run_test.go -func TestRunCollectGRPCEndpoints(t *testing.T) { +func TestRunCollectGRPCMultipleEndpoints(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + _ = os.Chdir(t.TempDir()) callType := helloworld.Unary gs, s, err := internal.StartServer(false) @@ -157,6 +169,10 @@ func TestRunCollectGRPCEndpoints(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) err = ct.run(exp) @@ -169,29 +185,38 @@ func TestRunCollectGRPCEndpoints(t *testing.T) { count := gs.GetCount(callType) assert.Equal(t, 200, count) - grpcMethods := []string{unary, server, client, bidirectional} - for _, method := range grpcMethods { - mm, err := exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "-" + method + "/" + gRPCErrorCountMetricName) - assert.NotNil(t, mm) - assert.NoError(t, err) - - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "-" + method + "/" + gRPCLatencySampleMetricName) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskData := exp.Result.Insights.TaskData[CollectGRPCTaskName] + assert.NotNil(t, taskData) - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "-" + method + "/" + gRPCLatencySampleMetricName + "/" + string(MaxAggregator)) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskDataBytes, err := json.Marshal(taskData) + assert.NoError(t, err) + ghzResult := GHZResult{} + err = json.Unmarshal(taskDataBytes, &ghzResult) + assert.NoError(t, err) - mm, err = exp.Result.Insights.GetMetricsInfo(gRPCMetricPrefix + "-" + method + "/" + gRPCLatencySampleMetricName + "/" + PercentileAggregatorPrefix + "50") - assert.NotNil(t, mm) - assert.NoError(t, err) - } + assert.Equal(t, 4, len(ghzResult)) + assert.NotNil(t, ghzResult[unary]) + assert.NotNil(t, ghzResult[server]) + assert.NotNil(t, ghzResult[client]) + assert.NotNil(t, ghzResult[bidirectional]) } +// TODO: should this still return insights even though the endpoints cannot be reached? +// This would mean no Grafana dashboard would be produced +// // If the endpoints cannot be reached, then do not throw an error // Should not return an nil pointer dereference error (see #1451) func TestRunCollectGRPCMultipleNoEndpoints(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + + unaryCall := "helloworld.Greeter.SayHello" + serverCall := "helloworld.Greeter.SayHelloCS" + clientCall := "helloworld.Greeter.SayHellos" + bidirectionalCall := "helloworld.Greeter.SayHelloBidi" + // valid collect GRPC task... should succeed ct := &collectGRPCTask{ TaskMeta: TaskMeta{ @@ -204,19 +229,19 @@ func TestRunCollectGRPCMultipleNoEndpoints(t *testing.T) { Endpoints: map[string]runner.Config{ unary: { Data: map[string]interface{}{"name": "bob"}, - Call: "helloworld.Greeter.SayHello", + Call: unaryCall, }, server: { Data: map[string]interface{}{"name": "bob"}, - Call: "helloworld.Greeter.SayHelloCS", + Call: serverCall, }, client: { Data: map[string]interface{}{"name": "bob"}, - Call: "helloworld.Greeter.SayHellos", + Call: clientCall, }, bidirectional: { Data: map[string]interface{}{"name": "bob"}, - Call: "helloworld.Greeter.SayHelloBidi", + Call: bidirectionalCall, }, }, }, @@ -227,18 +252,33 @@ func TestRunCollectGRPCMultipleNoEndpoints(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) + assert.NoError(t, err) + + taskData := exp.Result.Insights.TaskData[CollectGRPCTaskName] + assert.NotNil(t, taskData) + + taskDataBytes, err := json.Marshal(taskData) + assert.NoError(t, err) + ghzResult := GHZResult{} + err = json.Unmarshal(taskDataBytes, &ghzResult) assert.NoError(t, err) - // No metrics should be collected - assert.Equal(t, 0, len(exp.Result.Insights.NonHistMetricValues[0])) - assert.Equal(t, 0, len(exp.Result.Insights.HistMetricValues[0])) - assert.Equal(t, 0, len(exp.Result.Insights.SummaryMetricValues[0])) + assert.Equal(t, 0, len(ghzResult)) } -func TestMockGRPCWithSLOsAndPercentiles(t *testing.T) { +func TestRunCollectGRPCSingleEndpointMultipleCalls(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + _ = os.Chdir(t.TempDir()) callType := helloworld.Unary gs, s, err := internal.StartServer(false) @@ -254,76 +294,51 @@ func TestMockGRPCWithSLOsAndPercentiles(t *testing.T) { }, With: collectGRPCInputs{ Config: runner.Config{ - N: 100, - RPS: 20, - C: 1, - Timeout: runner.Duration(20 * time.Second), - Data: map[string]interface{}{"name": "bob"}, - DialTimeout: runner.Duration(20 * time.Second), - Call: "helloworld.Greeter.SayHello", - Host: internal.LocalHostPort, + Host: internal.LocalHostPort, + Call: "helloworld.Greeter.SayHello", }, - }, - } - - at := &assessTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(AssessTaskName), - }, - With: assessInputs{ - SLOs: &SLOLimits{ - Lower: []SLO{{ - Metric: "grpc/request-count", - Limit: 100, - }}, - Upper: []SLO{{ - Metric: "grpc/latency/mean", - Limit: 100, - }, { - Metric: "grpc/latency/p95.00", - Limit: 200, - }, { - Metric: "grpc/latency/stddev", - Limit: 20, - }, { - Metric: "grpc/latency/max", - Limit: 200, - }, { - Metric: "grpc/error-count", - Limit: 0, - }, { - Metric: "grpc/request-count", - Limit: 100, - }}, + Endpoints: map[string]runner.Config{ + unary: { + Data: map[string]interface{}{"name": "bob"}, + }, + unary2: { + Data: map[string]interface{}{"name": "charles"}, + }, }, }, } + + log.Logger.Debug("dial timeout before defaulting... ", ct.With.DialTimeout.String()) + exp := &Experiment{ - Spec: []Task{ct, at}, + Spec: []Task{ct}, + Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - err = exp.Spec[0].run(exp) - assert.NoError(t, err) - err = exp.Spec[1].run(exp) - assert.NoError(t, err) + err = ct.run(exp) - // assert SLOs are satisfied - for _, v := range exp.Result.Insights.SLOsSatisfied.Upper { - for _, b := range v { - assert.True(t, b) - } - } - for _, v := range exp.Result.Insights.SLOsSatisfied.Lower { - for _, b := range v { - assert.True(t, b) - } - } + log.Logger.Debug("dial timeout after defaulting... ", ct.With.DialTimeout.String()) - expBytes, _ := yaml.Marshal(exp) - log.Logger.Debug("\n" + string(expBytes)) + assert.NoError(t, err) + assert.Equal(t, exp.Result.Insights.NumVersions, 1) count := gs.GetCount(callType) - assert.Equal(t, int(ct.With.N), count) + assert.Equal(t, 400, count) + + taskData := exp.Result.Insights.TaskData[CollectGRPCTaskName] + assert.NotNil(t, taskData) + + taskDataBytes, err := json.Marshal(taskData) + assert.NoError(t, err) + ghzResult := GHZResult{} + err = json.Unmarshal(taskDataBytes, &ghzResult) + assert.NoError(t, err) + + assert.Equal(t, 2, len(ghzResult)) + assert.NotNil(t, ghzResult[unary]) + assert.NotNil(t, ghzResult[unary2]) } diff --git a/base/collect_http.go b/base/collect_http.go index 5dbe91fcb..02394fa35 100644 --- a/base/collect_http.go +++ b/base/collect_http.go @@ -8,7 +8,6 @@ import ( "fortio.org/fortio/fhttp" "fortio.org/fortio/periodic" - "fortio.org/fortio/stats" "github.com/imdario/mergo" log "github.com/iter8-tools/iter8/base/log" ) @@ -59,6 +58,11 @@ type collectHTTPInputs struct { Endpoints map[string]endpoint `json:"endpoints" yaml:"endpoints"` } +// HTTPResult is the raw data sent to the metrics server +// This data will be transformed into httpDashboard when getHTTPGrafana is called +// Key is the endpoint +type HTTPResult map[string]*fhttp.HTTPRunnerResults + const ( // CollectHTTPTaskName is the name of this task which performs load generation and metrics collection. CollectHTTPTaskName = "http" @@ -68,20 +72,6 @@ const ( defaultHTTPNumRequests = int64(100) // defaultHTTPConnections is the default number of connections (parallel go routines) defaultHTTPConnections = 4 - // httpMetricPrefix is the prefix for all metrics collected by this task - httpMetricPrefix = "http" - // the following are a list of names for metrics collected by this task - builtInHTTPRequestCountID = "request-count" - builtInHTTPErrorCountID = "error-count" - builtInHTTPErrorRateID = "error-rate" - builtInHTTPLatencyMeanID = "latency-mean" - builtInHTTPLatencyStdDevID = "latency-stddev" - builtInHTTPLatencyMinID = "latency-min" - builtInHTTPLatencyMaxID = "latency-max" - builtInHTTPLatencyHistID = "latency" - // prefix used in latency percentile metric names - // example: latency-p75.0 is the 75th percentile latency - builtInHTTPLatencyPercentilePrefix = "latency-p" ) var ( @@ -221,11 +211,12 @@ func getFortioOptions(c endpoint) (*fhttp.HTTPRunnerOptions, error) { // getFortioResults collects Fortio run results // func (t *collectHTTPTask) getFortioResults() (*fhttp.HTTPRunnerResults, error) { // key is the metric prefix -func (t *collectHTTPTask) getFortioResults() (map[string]*fhttp.HTTPRunnerResults, error) { +// key is the endpoint +func (t *collectHTTPTask) getFortioResults() (HTTPResult, error) { // the main idea is to run Fortio with proper options var err error - results := map[string]*fhttp.HTTPRunnerResults{} + results := HTTPResult{} if len(t.With.Endpoints) > 0 { log.Logger.Trace("multiple endpoints") for endpointID, endpoint := range t.With.Endpoints { @@ -254,7 +245,7 @@ func (t *collectHTTPTask) getFortioResults() (map[string]*fhttp.HTTPRunnerResult continue } - results[httpMetricPrefix+"-"+endpointID] = ifr + results[endpointID] = ifr } } else { fo, err := getFortioOptions(t.With.endpoint) @@ -270,10 +261,10 @@ func (t *collectHTTPTask) getFortioResults() (map[string]*fhttp.HTTPRunnerResult ifr, err := fhttp.RunHTTPTest(fo) if err != nil { log.Logger.WithStackTrace(err.Error()).Error("fortio failed") - return results, err + return nil, err } - results[httpMetricPrefix] = ifr + results[t.With.URL] = ifr } return results, err @@ -306,131 +297,9 @@ func (t *collectHTTPTask) run(exp *Experiment) error { if err != nil { return err } - in := exp.Result.Insights - - for provider, data := range data { - // request count - m := provider + "/" + builtInHTTPRequestCountID - mm := MetricMeta{ - Description: "number of requests sent", - Type: CounterMetricType, - } - if err = in.updateMetric(m, mm, 0, float64(data.DurationHistogram.Count)); err != nil { - return err - } - - // error count & rate - val := float64(0) - for code, count := range data.RetCodes { - if t.errorCode(code) { - val += float64(count) - } - } - // error count - m = provider + "/" + builtInHTTPErrorCountID - mm = MetricMeta{ - Description: "number of responses that were errors", - Type: CounterMetricType, - } - if err = in.updateMetric(m, mm, 0, val); err != nil { - return err - } - // error-rate - m = provider + "/" + builtInHTTPErrorRateID - rc := float64(data.DurationHistogram.Count) - if rc != 0 { - mm = MetricMeta{ - Description: "fraction of responses that were errors", - Type: GaugeMetricType, - } - if err = in.updateMetric(m, mm, 0, val/rc); err != nil { - return err - } - } - - // mean-latency - m = provider + "/" + builtInHTTPLatencyMeanID - mm = MetricMeta{ - Description: "mean of observed latency values", - Type: GaugeMetricType, - Units: StringPointer("msec"), - } - if err = in.updateMetric(m, mm, 0, 1000.0*data.DurationHistogram.Avg); err != nil { - return err - } - - // stddev-latency - m = provider + "/" + builtInHTTPLatencyStdDevID - mm = MetricMeta{ - Description: "standard deviation of observed latency values", - Type: GaugeMetricType, - Units: StringPointer("msec"), - } - if err = in.updateMetric(m, mm, 0, 1000.0*data.DurationHistogram.StdDev); err != nil { - return err - } - - // min-latency - m = provider + "/" + builtInHTTPLatencyMinID - mm = MetricMeta{ - Description: "minimum of observed latency values", - Type: GaugeMetricType, - Units: StringPointer("msec"), - } - if err = in.updateMetric(m, mm, 0, 1000.0*data.DurationHistogram.Min); err != nil { - return err - } - - // max-latency - m = provider + "/" + builtInHTTPLatencyMaxID - mm = MetricMeta{ - Description: "maximum of observed latency values", - Type: GaugeMetricType, - Units: StringPointer("msec"), - } - if err = in.updateMetric(m, mm, 0, 1000.0*data.DurationHistogram.Max); err != nil { - return err - } - - // percentiles - for _, p := range data.DurationHistogram.Percentiles { - m = fmt.Sprintf("%v/%v%v", provider, builtInHTTPLatencyPercentilePrefix, p.Percentile) - mm = MetricMeta{ - Description: fmt.Sprintf("%v-th percentile of observed latency values", p.Percentile), - Type: GaugeMetricType, - Units: StringPointer("msec"), - } - if err = in.updateMetric(m, mm, 0, 1000.0*p.Value); err != nil { - return err - } - } - - // latency histogram - m = httpMetricPrefix + "/" + builtInHTTPLatencyHistID - mm = MetricMeta{ - Description: "Latency Histogram", - Type: HistogramMetricType, - Units: StringPointer("msec"), - } - lh := latencyHist(data.DurationHistogram) - if err = in.updateMetric(m, mm, 0, lh); err != nil { - return err - } - } + // write data to Insights + exp.Result.Insights.TaskData[CollectHTTPTaskName] = data return nil } - -// compute latency histogram by resampling -func latencyHist(hd *stats.HistogramData) []HistBucket { - buckets := []HistBucket{} - for _, v := range hd.Data { - buckets = append(buckets, HistBucket{ - Lower: v.Start * 1000.0, // sec to msec - Upper: v.End * 1000.0, - Count: uint64(v.Count), - }) - } - return buckets -} diff --git a/base/collect_http_test.go b/base/collect_http_test.go index 7a3775702..1c2900809 100644 --- a/base/collect_http_test.go +++ b/base/collect_http_test.go @@ -2,6 +2,7 @@ package base import ( "bytes" + "encoding/json" "fmt" "io" "net/http" @@ -19,9 +20,17 @@ const ( foo = "foo" bar = "bar" from = "from" + + myName = "myName" + myNamespace = "myNamespace" ) func TestRunCollectHTTP(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + mux, addr := fhttp.DynamicHTTPServer(false) // /foo/ handler @@ -38,7 +47,7 @@ func TestRunCollectHTTP(t *testing.T) { } mux.HandleFunc("/"+foo, handler) - baseURL := fmt.Sprintf("http://localhost:%d/", addr.Port) + url := fmt.Sprintf("http://localhost:%d/", addr.Port) + foo // valid collect HTTP task... should succeed ct := &collectHTTPTask{ @@ -50,7 +59,7 @@ func TestRunCollectHTTP(t *testing.T) { Duration: StringPointer("1s"), PayloadFile: StringPointer(CompletePath("../", "testdata/payload/ukpolice.json")), Headers: map[string]string{}, - URL: baseURL + foo, + URL: url, }, }, } @@ -58,20 +67,28 @@ func TestRunCollectHTTP(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) assert.NoError(t, err) assert.True(t, called) // ensure that the /foo/ handler is called assert.Equal(t, exp.Result.Insights.NumVersions, 1) - mm, err := exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "/" + builtInHTTPLatencyMeanID) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskData := exp.Result.Insights.TaskData[CollectHTTPTaskName] + assert.NotNil(t, taskData) - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "/" + builtInHTTPLatencyPercentilePrefix + "50") - assert.NotNil(t, mm) + taskDataBytes, err := json.Marshal(taskData) + assert.NoError(t, err) + httpResult := HTTPResult{} + err = json.Unmarshal(taskDataBytes, &httpResult) assert.NoError(t, err) + + assert.Equal(t, 1, len(httpResult)) + assert.NotNil(t, httpResult[url]) } // If the endpoint does not exist, fail gracefully @@ -110,6 +127,11 @@ func TestRunCollectHTTPNoEndpoint(t *testing.T) { // Test both the /foo/ and /bar/ endpoints // Test both endpoints have their respective header values func TestRunCollectHTTPMultipleEndpoints(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + mux, addr := fhttp.DynamicHTTPServer(false) // /foo/ handler @@ -137,6 +159,10 @@ func TestRunCollectHTTPMultipleEndpoints(t *testing.T) { mux.HandleFunc("/"+bar, barHandler) baseURL := fmt.Sprintf("http://localhost:%d/", addr.Port) + endpoint1 := "endpoint1" + endpoint2 := "endpoint2" + endpoint1URL := baseURL + foo + endpoint2URL := baseURL + bar // valid collect HTTP task... should succeed ct := &collectHTTPTask{ @@ -149,13 +175,13 @@ func TestRunCollectHTTPMultipleEndpoints(t *testing.T) { }, Endpoints: map[string]endpoint{ endpoint1: { - URL: baseURL + foo, + URL: endpoint1URL, Headers: map[string]string{ from: foo, }, }, endpoint2: { - URL: baseURL + bar, + URL: endpoint2URL, Headers: map[string]string{ from: bar, }, @@ -167,35 +193,41 @@ func TestRunCollectHTTPMultipleEndpoints(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) assert.NoError(t, err) assert.True(t, fooCalled) // ensure that the /foo/ handler is called assert.True(t, barCalled) // ensure that the /bar/ handler is called assert.Equal(t, exp.Result.Insights.NumVersions, 1) - mm, err := exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint1 + "/" + builtInHTTPLatencyMeanID) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskData := exp.Result.Insights.TaskData[CollectHTTPTaskName] + assert.NotNil(t, taskData) - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint1 + "/" + builtInHTTPLatencyPercentilePrefix + "50") - assert.NotNil(t, mm) + taskDataBytes, err := json.Marshal(taskData) assert.NoError(t, err) - - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint2 + "/" + builtInHTTPLatencyMeanID) - assert.NotNil(t, mm) + httpResult := HTTPResult{} + err = json.Unmarshal(taskDataBytes, &httpResult) assert.NoError(t, err) - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint2 + "/" + builtInHTTPLatencyPercentilePrefix + "50") - assert.NotNil(t, mm) - assert.NoError(t, err) + assert.Equal(t, 2, len(httpResult)) + assert.NotNil(t, httpResult[endpoint1]) + assert.NotNil(t, httpResult[endpoint2]) } // Multiple endpoints are provided but they share one URL // Test that the base-level URL is provided to each endpoint // Make multiple calls to the same URL but with different headers func TestRunCollectHTTPSingleEndpointMultipleCalls(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + mux, addr := fhttp.DynamicHTTPServer(false) // handler @@ -214,6 +246,8 @@ func TestRunCollectHTTPSingleEndpointMultipleCalls(t *testing.T) { mux.HandleFunc("/", fooHandler) baseURL := fmt.Sprintf("http://localhost:%d/", addr.Port) + endpoint1 := "endpoint1" + endpoint2 := "endpoint2" // valid collect HTTP task... should succeed ct := &collectHTTPTask{ @@ -243,37 +277,48 @@ func TestRunCollectHTTPSingleEndpointMultipleCalls(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) assert.NoError(t, err) assert.True(t, fooCalled) // ensure that the /foo/ handler is called assert.True(t, barCalled) // ensure that the /bar/ handler is called assert.Equal(t, exp.Result.Insights.NumVersions, 1) - mm, err := exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint1 + "/" + builtInHTTPLatencyMeanID) - assert.NotNil(t, mm) - assert.NoError(t, err) + taskData := exp.Result.Insights.TaskData[CollectHTTPTaskName] + assert.NotNil(t, taskData) - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint1 + "/" + builtInHTTPLatencyPercentilePrefix + "50") - assert.NotNil(t, mm) + taskDataBytes, err := json.Marshal(taskData) assert.NoError(t, err) - - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint2 + "/" + builtInHTTPLatencyMeanID) - assert.NotNil(t, mm) + httpResult := HTTPResult{} + err = json.Unmarshal(taskDataBytes, &httpResult) assert.NoError(t, err) - mm, err = exp.Result.Insights.GetMetricsInfo(httpMetricPrefix + "-" + endpoint2 + "/" + builtInHTTPLatencyPercentilePrefix + "50") - assert.NotNil(t, mm) - assert.NoError(t, err) + assert.Equal(t, 2, len(httpResult)) + assert.NotNil(t, httpResult[endpoint1]) + assert.NotNil(t, httpResult[endpoint2]) } +// TODO: should this still return insights even though the endpoints cannot be reached? +// This would mean no Grafana dashboard would be produced +// // If the endpoints cannot be reached, then do not throw an error // Should not return an nil pointer dereference error (see #1451) func TestRunCollectHTTPMultipleNoEndpoints(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + _, addr := fhttp.DynamicHTTPServer(false) baseURL := fmt.Sprintf("http://localhost:%d/", addr.Port) + endpoint1URL := baseURL + foo + endpoint2URL := baseURL + bar // valid collect HTTP task... should succeed ct := &collectHTTPTask{ @@ -286,13 +331,13 @@ func TestRunCollectHTTPMultipleNoEndpoints(t *testing.T) { }, Endpoints: map[string]endpoint{ endpoint1: { - URL: baseURL + foo, + URL: endpoint1URL, Headers: map[string]string{ from: foo, }, }, endpoint2: { - URL: baseURL + bar, + URL: endpoint2URL, Headers: map[string]string{ from: bar, }, @@ -304,15 +349,25 @@ func TestRunCollectHTTPMultipleNoEndpoints(t *testing.T) { exp := &Experiment{ Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) + assert.NoError(t, err) + + taskData := exp.Result.Insights.TaskData[CollectHTTPTaskName] + assert.NotNil(t, taskData) + + taskDataBytes, err := json.Marshal(taskData) + assert.NoError(t, err) + httpResult := HTTPResult{} + err = json.Unmarshal(taskDataBytes, &httpResult) assert.NoError(t, err) - // No metrics should be collected - assert.Equal(t, 0, len(exp.Result.Insights.NonHistMetricValues[0])) - assert.Equal(t, 0, len(exp.Result.Insights.HistMetricValues[0])) - assert.Equal(t, 0, len(exp.Result.Insights.SummaryMetricValues[0])) + assert.Equal(t, 0, len(httpResult)) } func TestErrorCode(t *testing.T) { diff --git a/base/custom_metrics.go b/base/custom_metrics.go deleted file mode 100644 index 0201989df..000000000 --- a/base/custom_metrics.go +++ /dev/null @@ -1,341 +0,0 @@ -package base - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io" - "math" - "net/http" - "strconv" - "strings" - - "time" - - "github.com/itchyny/gojq" - log "github.com/iter8-tools/iter8/base/log" - "sigs.k8s.io/yaml" -) - -// ProviderSpec specifies how to get metrics from a provider -type ProviderSpec struct { - // URL is the database endpoint - URL string `json:"url" yaml:"url"` - - // Method is the HTTP method that needs to be used - Method string `json:"method" yaml:"method"` - - // Headers is the set of HTTP headers that need to be sent - Headers map[string]string `json:"headers" yaml:"headers"` - - // Metrics is the set of metrics that can be obtained - Metrics []Metric `json:"metrics" yaml:"metrics"` -} - -// Metric defines how to construct HTTP requests and parse HTTP responses -// when querying a database for a metric. Metric struct also includes metadata -// such as the name and description of the metric. -type Metric struct { - // Name is the name of the metric - Name string `json:"name" yaml:"name"` - - // Description is the description of the metric - Description *string `json:"description,omitempty" yaml:"description,omitempty"` - - // Type is the type of the metric, either gauge or counter - Type string `json:"type" yaml:"type"` - - // Units is the unit of the metric, which can be omitted for unitless metrics - Units *string `json:"units,omitempty" yaml:"units,omitempty"` - - // Params is the set of HTTP parameters that need to be sent - Params *[]HTTPParam `json:"params,omitempty" yaml:"params,omitempty"` - - // Body is the HTTP request body that needs to be sent - Body *string `json:"body,omitempty" yaml:"body,omitempty"` - - // JqExpression is the jq expression that can extract the value from the HTTP - // response - JqExpression string `json:"jqExpression" yaml:"jqExpression"` -} - -// HTTPParam defines an HTTP parameter -type HTTPParam struct { - // Name is the name of the HTTP parameter - Name string `json:"name" yaml:"name"` - - // Value is the value of the HTTP parameter - Value string `json:"value" yaml:"value"` -} - -// customMetricsInputs is the input to the custommetrics task -type customMetricsInputs struct { - // Template maps the provider to its template URL - Templates map[string]string `json:"templates" yaml:"templates"` - - // Values is used for substituting placeholders in metric templates. - Values map[string]interface{} `json:"values" yaml:"values"` - - // VersionValues are per version values that override values - // For each version, its version values are coalesced with values - // The length of this slice equals the number of versions - VersionValues []map[string]interface{} `json:"versionValues" yaml:"versionValues"` -} - -const ( - // CustomMetricsTaskName is the name of this task which fetches metrics templates, constructs metric specs, and then fetches metrics for each version from metric provider databases - CustomMetricsTaskName = "custommetrics" - - // startingTime specifies how far back to go in time for a specific version - // startingTimeStr is starting time placeholder - startingTimeStr = "startingTime" - - // how much time has elapsed between startingTime and now - elapsedTimeSecondsStr = "elapsedTimeSeconds" -) - -// customMetricsTask enables collection of custom metrics from databases -type customMetricsTask struct { - TaskMeta - With customMetricsInputs `json:"with" yaml:"with"` -} - -// initializeDefaults sets default values for the custom metrics task -func (t *customMetricsTask) initializeDefaults() { - // initialize versionValues if absent - if len(t.With.VersionValues) == 0 { - t.With.VersionValues = []map[string]interface{}{t.With.Values} - } -} - -// validate task inputs -func (t *customMetricsTask) validateInputs() error { - return nil -} - -// getElapsedTimeSeconds using values and experiment -// -// elapsedTime = currentTime - startingTime -// -// First, check for startingTime in values. -// If unavailable, use startingTime of the experiment. -func getElapsedTimeSeconds(values map[string]interface{}, exp *Experiment) (int64, error) { - startingTime := exp.Result.StartTime.Time - if values[startingTimeStr] != nil { - var err error - // Calling Parse() method with its parameters - startingTime, err = time.Parse(time.RFC3339, fmt.Sprintf("%v", values[startingTimeStr])) - - if err != nil { - return 0, errors.New("cannot parse startingTime") - } - } - - // calculate the elapsedTimeSeconds based on the startingTime if it has been provided - currentTime := time.Now() - return int64(currentTime.Sub(startingTime).Seconds()), nil -} - -// construct request to database and return extracted metric value -// -// bool return value represents whether the pipeline was able to run to -// completion (prevents double error statement) -func queryDatabaseAndGetValue(template ProviderSpec, metric Metric) (interface{}, bool) { - var requestBody io.Reader - if metric.Body != nil { - requestBody = strings.NewReader(*metric.Body) - } - - // create a new HTTP request - req, err := http.NewRequest(template.Method, template.URL, requestBody) - if err != nil { - log.Logger.Error("could not create new request for metric ", metric.Name, ": ", err) - return nil, false - } - - // iterate through headers - for headerName, headerValue := range template.Headers { - req.Header.Add(headerName, headerValue) - log.Logger.Debug("add header: ", headerName, ", value: ", headerValue) - } - req.Header.Add("Content-Type", "application/json;charset=utf-8") - - // add query params - q := req.URL.Query() - params := metric.Params - for _, param := range *params { - q.Add(param.Name, param.Value) - log.Logger.Debug("add param: ", param.Name, ", value: ", param.Value) - } - req.URL.RawQuery = q.Encode() - - // send request - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - log.Logger.Error("could not request metric ", metric.Name, ": ", err.Error()) - return nil, false - } - defer func() { - _ = resp.Body.Close() - }() - - // read response responseBody - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - log.Logger.Error("could not read response body for metric ", metric.Name, ": ", err) - return nil, false - } - - log.Logger.Debug("response body: ", string(responseBody)) - - // JSON parse response body - var jsonBody interface{} - err = json.Unmarshal([]byte(responseBody), &jsonBody) - if err != nil { - log.Logger.Error("could not JSON parse response body for metric ", metric.Name, ": ", err) - return nil, false - } - - // perform jq expression - query, err := gojq.Parse(metric.JqExpression) - if err != nil { - log.Logger.Error("could not parse jq expression \""+metric.JqExpression+"\" for metric ", metric.Name, ": ", err) - return nil, false - } - iter := query.Run(jsonBody) - - value, ok := iter.Next() - if !ok { - log.Logger.Error("could not extract value with jq expression for metric ", metric.Name, ": ", err) - return nil, false - } - - return value, true -} - -// run executes this task -func (t *customMetricsTask) run(exp *Experiment) error { - // validate inputs - var err error - - err = t.validateInputs() - if err != nil { - return err - } - - // initialize defaults - t.initializeDefaults() - - err = exp.Result.initInsightsWithNumVersions(len(t.With.VersionValues)) - if err != nil { - return err - } - - // collect metrics from all providers and for all versions - for providerName, url := range t.With.Templates { - // finalize metrics spec - template, err := getTextTemplateFromURL(url) - if err != nil { - return err - } - - for i, versionValues := range t.With.VersionValues { - // merge values - vals, err := mustMergeOverwrite(t.With.Values, versionValues) - if err != nil { - return err - } - values := vals.(map[string]interface{}) - if len(values) == 0 { - values = make(map[string]interface{}) - } - // add elapsedTimeSeconds - elapsedTimeSeconds, err := getElapsedTimeSeconds(values, exp) - if err != nil { - return err - } - values[elapsedTimeSecondsStr] = elapsedTimeSeconds - - // get the metrics spec - var buf bytes.Buffer - err = template.Execute(&buf, values) - if err != nil { - log.Logger.Error("cannot execute metrics spec with values", err) - log.Logger.Error("metrics spec: ", buf.String()) - log.Logger.Error("values: ", values) - return err - } - - bytes, _ := io.ReadAll(&buf) - var provider ProviderSpec - err = yaml.Unmarshal(bytes, &provider) - if err != nil { - log.Logger.Error("cannot unmarshal provider spec", err) - log.Logger.Error("provider spec: ", string(bytes)) - return err - } - log.Logger.Debugf("provider spec %v for version %v\n", providerName, i) - log.Logger.Debug("--------------------------------") - log.Logger.Debug(string(bytes)) - - // get each metric - for _, metric := range provider.Metrics { - log.Logger.Debug("query for metric ", metric.Name) - - // perform database query and extract metric value - val, ok := queryDatabaseAndGetValue(provider, metric) - - // check if there were any issues querying database and extracting value - if !ok { - log.Logger.Error("could not query for metric ", metric.Name) - continue - } - - // do not save value if it has no value - if val == nil { - log.Logger.Error("could not extract non-nil value for metric ", metric.Name) - continue - } - - // determine metric type - var metricType MetricType - if metric.Type == "gauge" { - metricType = GaugeMetricType - } else if metric.Type == "counter" { - metricType = CounterMetricType - } - - // finalize metric data - mm := MetricMeta{ - Description: *metric.Description, - Type: metricType, - Units: metric.Units, - } - - // convert value to float - valueString := fmt.Sprint(val) - floatValue, err := strconv.ParseFloat(valueString, 64) - if err != nil { - log.Logger.Error("could not parse string \""+valueString+"\" to float: ", err) - continue - } - - if math.IsNaN(floatValue) { - log.Logger.Debug("metric value is NaN", errors.New("metric value is NaN - ignored")) - continue - } - - err = exp.Result.Insights.updateMetric(providerName+"/"+metric.Name, mm, i, floatValue) - - if err != nil { - log.Logger.Error("could not add update metric", err) - continue - } - } - } - } - - return nil -} diff --git a/base/custom_metrics_test.go b/base/custom_metrics_test.go deleted file mode 100644 index b1e04cad5..000000000 --- a/base/custom_metrics_test.go +++ /dev/null @@ -1,859 +0,0 @@ -package base - -import ( - "errors" - "io" - "net/http" - "net/url" - "os" - "strings" - "testing" - - "github.com/jarcoal/httpmock" - "github.com/stretchr/testify/assert" -) - -const ( - queryString = "?query=" - exampleQueryParameter = "example query parameter\n" - exampleRequestBody = "example request body\n" - - // the provider URL is mocked - cePromProviderURL = "https://raw.githubusercontent.com/iter8-tools/iter8/master/testdata/metrics/test-ce.metrics.yaml" - testCE = "test-ce" - testCEPromURL = `test-database.com/prometheus/api/v1/query` - testCERequestCount = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - "}[0s])) or on() vector(0)\n" - testCEErrorCount = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_status!=\"200\",\n" + - "}[0s])) or on() vector(0)\n" - testCEErrorRate = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_status!=\"200\",\n" + - "}[0s])) or on() vector(0)/sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - "}[0s])) or on() vector(0)\n" - testCERequestCountWithRevisionName = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_revision_name=\"v1\",\n" + - "}[0s])) or on() vector(0)\n" - testCEErrorCountWithRevisionName = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_status!=\"200\",\n" + - " ibm_codeengine_revision_name=\"v1\",\n" + - "}[0s])) or on() vector(0)\n" - testCEErrorRateWithRevisionName = "sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_status!=\"200\",\n" + - " ibm_codeengine_revision_name=\"v1\",\n" + - "}[0s])) or on() vector(0)/sum(last_over_time(ibm_codeengine_application_requests_total{\n" + - " ibm_codeengine_revision_name=\"v1\",\n" + - "}[0s])) or on() vector(0)\n" - - // the provider URL is mocked - testProviderURL = "https://raw.githubusercontent.com/iter8-tools/iter8/master/testdata/metrics/test-request-body.metrics.yaml" - testRequestBody = "test-request-body" - - // the provider URL is mocked - istioPromProviderURL = "https://raw.githubusercontent.com/iter8-tools/iter8/master/custommetrics/istio-prom.tpl" - - istioPromRequestCount = "sum(last_over_time(istio_requests_total{\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0)" - istioPromErrorCount = "sum(last_over_time(istio_requests_total{\n" + - " response_code=~'5..',\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0)" - istioPromErrorRate = "(sum(last_over_time(istio_requests_total{\n" + - " response_code=~'5..',\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0))" - istioPromMeanLatency = "(sum(last_over_time(istio_request_duration_milliseconds_sum{\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) or on() vector(0))" - istioPromLatencyP90 = "histogram_quantile(0.90, sum(rate(istio_request_duration_milliseconds_bucket{\n" + - " destination_workload=\"myApp\",\n" + - " destination_workload_namespace=\"production\",\n" + - " reporter=\"destination\",\n" + - "}[0s])) by (le))" -) - -func getCustomMetricsTask(t *testing.T, providerName string, providerURL string) *customMetricsTask { - // valid collect database task... should succeed - ct := &customMetricsTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(CustomMetricsTaskName), - }, - With: customMetricsInputs{ - Templates: map[string]string{providerName: providerURL}, - }, - } - - httpmock.Activate() - t.Cleanup(httpmock.DeactivateAndReset) - httpmock.RegisterNoResponder(httpmock.InitialTransport.RoundTrip) - return ct -} - -// test getElapsedTimeSeconds() -func TestGetElapsedTimeSeconds(t *testing.T) { - _ = os.Chdir(t.TempDir()) - versionValues := map[string]interface{}{ - "startingTime": "2020-02-01T09:44:40Z", - } - - exp := &Experiment{ - Spec: []Task{}, - Result: &ExperimentResult{}, - } - - // this should add a startingTime that will be overwritten by the one in - // versionValues - exp.initResults(1) - - elapsedTimeSeconds, _ := getElapsedTimeSeconds(versionValues, exp) - - // elapsedTimeSeconds should be a large number - // - // if getElapsedTimeSeconds() used the starting time from the experiment instead of - // the one from versionValues, the elapsed time would be 0 or close to 0 - assert.Equal(t, elapsedTimeSeconds > 1000000, true) -} - -// test if a user sets startingTime incorrectly getElapsedTimeSeconds() -func TestStartingTimeFormatError(t *testing.T) { - _ = os.Chdir(t.TempDir()) - versionValues := map[string]interface{}{ - "startingTime": "1652935205", - } - - exp := &Experiment{ - Spec: []Task{}, - Result: &ExperimentResult{}, - } - - // this should add a startingTime that will be overwritten by the one in - // versionValues - exp.initResults(1) - _, err := getElapsedTimeSeconds(versionValues, exp) - assert.Error(t, err) -} - -// test istio-prom provider spec -func TestIstioProm(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "istio-prom.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, "istio-prom", istioPromProviderURL) - ct.With.Values = map[string]interface{}{"latencyPercentiles": []string{"90"}} - ct.With.VersionValues = []map[string]interface{}{{ - "labels": map[string]interface{}{ - "reporter": "destination", - "destination_workload": "myApp", - "destination_workload_namespace": "production", - }, - "elapsedTimeSeconds": "5", - }} - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // mock Istio Prometheus server - httpmock.RegisterResponder("GET", "http://prometheus.istio-system:9090/api/v1/query", - func(req *http.Request) (*http.Response, error) { - queryParam := strings.TrimSpace(req.URL.Query().Get("query")) - - switch queryParam { - case istioPromRequestCount: - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "43" - ] - } - ] - } - }`), nil - - case istioPromErrorCount: - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "6" - ] - } - ] - } - }`), nil - - case istioPromErrorRate: - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "0.13953488372093023" - ] - } - ] - } - }`), nil - - case istioPromMeanLatency: - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "52" - ] - } - ] - } - }`), nil - - case istioPromLatencyP90: - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1945602108.839, - "64" - ] - } - ] - } - }`), nil - } - - return nil, errors.New("") - }) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // all three metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["istio-prom/request-count"][0], float64(43)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["istio-prom/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["istio-prom/error-rate"][0], 0.13953488372093023) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["istio-prom/latency-mean"][0], float64(52)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["istio-prom/latency-p90"][0], float64(64)) -} - -func TestNaN(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "nan.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, "nan", "http://url") - - // mock provider URL - httpmock.RegisterResponder("GET", "http://url", - httpmock.NewStringResponder(200, tplString)) - - // mock provider - httpmock.RegisterResponder("GET", "http://url/query", - func(req *http.Request) (*http.Response, error) { - queryParam := strings.TrimSpace(req.URL.Query().Get("query")) - t.Logf("queryParam = %s", queryParam) - - switch queryParam { - case "query-tonumber": - return httpmock.NewStringResponse(200, `{"value": "NaN"}`), nil - case "query-no-tonumber": - return httpmock.NewStringResponse(200, `{"value": "NaN"}`), nil - } - - return nil, errors.New("") - }) - - // experiment - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // task run should not fail - assert.NoError(t, err) - - // no metrics should be recorded - assert.NotContains(t, exp.Result.Insights.NonHistMetricValues[0], "nan/query-tonumber") - assert.NotContains(t, exp.Result.Insights.NonHistMetricValues[0], "nan/query-no-tonumber") -} - -// basic test with one version, mimicking Code Engine -// one version, three successful metrics -func TestCEOneVersion(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCount), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "43" - ] - } - ] - } - }`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCount), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645648760.725, - "6" - ] - } - ] - } - }`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRate), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645043851.825, - "0.13953488372093023" - ] - } - ] - } - }`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // all three metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/request-count"][0], float64(43)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-rate"][0], 0.13953488372093023) -} - -// basic test with versionValues, mimicking Code Engine -// one version, three successful metrics -func TestCEVersionValues(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - ct.With.VersionValues = []map[string]interface{}{{ - "ibm_codeengine_revision_name": "v1", - }} - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCountWithRevisionName), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "43" - ] - } - ] - } - }`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCountWithRevisionName), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645648760.725, - "6" - ] - } - ] - } - }`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRateWithRevisionName), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645043851.825, - "0.13953488372093023" - ] - } - ] - } - }`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // all three metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/request-count"][0], float64(43)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-rate"][0], 0.13953488372093023) -} - -// test with one version and improper authorization, mimicking Code Engine -// one version, three successful metrics -func TestCEUnauthorized(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCount), - httpmock.NewStringResponder(401, `Unauthorized`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCount), - httpmock.NewStringResponder(401, `Unauthorized`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRate), - httpmock.NewStringResponder(401, `Unauthorized`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // no values should be collected because of unauthorized requests - assert.Equal(t, len(exp.Result.Insights.NonHistMetricValues[0]), 0) -} - -// test with one version with some values, mimicking Code Engine -// one version, three successful metrics, one without values -func TestCESomeValues(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCount), httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [] - } - }`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCount), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645648760.725, - "6" - ] - } - ] - } - }`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRate), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645043851.825, - "0.13953488372093023" - ] - } - ] - } - }`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // two metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-rate"][0], 0.13953488372093023) - - // request-count should not exist because there was no value from response - _, ok := exp.Result.Insights.NonHistMetricValues[0]["test-ce/request-count"] - assert.Equal(t, ok, false) -} - -// test with two version with some values, mimicking Code Engine -// two versions, four successful metrics, two without values -func TestCEMultipleVersions(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - ct.With.VersionValues = []map[string]interface{}{{}, {}} - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCount), httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [] - } - }`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCount), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645648760.725, - "6" - ] - } - ] - } - }`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRate), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645043851.825, - "0.13953488372093023" - ] - } - ] - } - }`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(2) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // two metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[1]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-rate"][0], 0.13953488372093023) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[1]["test-ce/error-rate"][0], 0.13953488372093023) - - // request-count should not exist because there was no value from response - _, ok := exp.Result.Insights.NonHistMetricValues[0]["test-ce/request-count"] - assert.Equal(t, ok, false) -} - -// test with two version with some values, mimicking Code Engine -// two versions, four successful metrics, two without values -func TestCEMultipleVersionsAndMetrics(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", "test-ce.tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testCE, cePromProviderURL) - - ct.With.VersionValues = []map[string]interface{}{{}, {}} - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCERequestCount), httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [] - } - }`)) - - // error-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorCount), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645648760.725, - "6" - ] - } - ] - } - }`)) - - // error-rate - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(testCEErrorRate), - httpmock.NewStringResponder(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645043851.825, - "0.13953488372093023" - ] - } - ] - } - }`)) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(2) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - // two metrics should exist and have values - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[1]["test-ce/error-count"][0], float64(6)) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0]["test-ce/error-rate"][0], 0.13953488372093023) - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[1]["test-ce/error-rate"][0], 0.13953488372093023) - - // request-count should not exist because there was no value from response - _, ok := exp.Result.Insights.NonHistMetricValues[0]["test-ce/request-count"] - assert.Equal(t, ok, false) -} - -// basic test with a request body -func TestRequestBody(t *testing.T) { - dat, err := os.ReadFile(CompletePath("../testdata/custommetrics", testRequestBody+".tpl")) - assert.NoError(t, err) - tplString := string(dat) - - _ = os.Chdir(t.TempDir()) - ct := getCustomMetricsTask(t, testRequestBody, testProviderURL) - - // mock provider URL - httpmock.RegisterResponder("GET", istioPromProviderURL, - httpmock.NewStringResponder(200, tplString)) - - // request-count - httpmock.RegisterResponder("GET", testCEPromURL+queryString+url.QueryEscape(exampleQueryParameter), - func(req *http.Request) (*http.Response, error) { - if req.Body != nil { - b, err := io.ReadAll(req.Body) - if err != nil { - panic(err) - } - - if string(b) == exampleRequestBody { - return httpmock.NewStringResponse(200, `{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": {}, - "value": [ - 1645602108.839, - "43" - ] - } - ] - } - }`), nil - } - } - - return nil, errors.New("") - }) - - exp := &Experiment{ - Spec: []Task{ct}, - Result: &ExperimentResult{}, - } - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - - err = ct.run(exp) - - // test should not fail - assert.NoError(t, err) - - assert.Equal(t, exp.Result.Insights.NonHistMetricValues[0][testRequestBody+"/request-count"][0], float64(43)) -} diff --git a/base/experiment.go b/base/experiment.go index 92e4a4289..5105903df 100644 --- a/base/experiment.go +++ b/base/experiment.go @@ -4,16 +4,9 @@ import ( "encoding/json" "errors" "fmt" - "math" - "reflect" - "regexp" - "strconv" - "strings" "github.com/antonmedv/expr" log "github.com/iter8-tools/iter8/base/log" - "github.com/iter8-tools/iter8/base/summarymetrics" - "github.com/montanaflynn/stats" "helm.sh/helm/v3/pkg/time" ) @@ -33,8 +26,20 @@ type Task interface { // ExperimentSpec specifies the set of tasks in this experiment type ExperimentSpec []Task +// ExperimentMetadata species the name and namespace of the experiment +// Used in http and grpc tasks to send the name and namespace to the metrics server +type ExperimentMetadata struct { + // Name is the name of the experiment + Name string `json:"name" yaml:"name"` + + // Namespace is the namespace the experiment was deployed in + Namespace string `json:"namespace" yaml:"namespace"` +} + // Experiment struct containing spec and result type Experiment struct { + Metadata ExperimentMetadata `json:"metadata" yaml:"metadata"` + // Spec is the sequence of tasks that constitute this experiment Spec ExperimentSpec `json:"spec" yaml:"spec"` @@ -48,15 +53,18 @@ type Experiment struct { // ExperimentResult defines the current results from the experiment type ExperimentResult struct { + // Name is the name of this experiment + Name string `json:"name,omitempty" yaml:"name,omitempty"` + + // Namespace is the namespace of this experiment + Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"` + // Revision of this experiment Revision int `json:"revision,omitempty" yaml:"revision,omitempty"` // StartTime is the time when the experiment run started StartTime time.Time `json:"startTime" yaml:"startTime"` - // NumLoops is the number of iterations this experiment has been running for - NumLoops int `json:"numLoops" yaml:"numLoops"` - // NumCompletedTasks is the number of completed tasks NumCompletedTasks int `json:"numCompletedTasks" yaml:"numCompletedTasks"` @@ -70,10 +78,7 @@ type ExperimentResult struct { Iter8Version string `json:"iter8Version" yaml:"iter8Version"` } -// Insights records the number of versions in this experiment, -// metric values and SLO indicators for each version, -// metrics metadata for all metrics, and -// SLO definitions for all SLOs +// Insights records the number of versions in this experiment type Insights struct { // NumVersions is the number of app versions detected by Iter8 NumVersions int `json:"numVersions" yaml:"numVersions"` @@ -81,48 +86,8 @@ type Insights struct { // VersionNames is list of version identifiers if known VersionNames []VersionInfo `json:"versionNames" yaml:"versionNames"` - // MetricsInfo identifies the metrics involved in this experiment - MetricsInfo map[string]MetricMeta `json:"metricsInfo,omitempty" yaml:"metricsInfo,omitempty"` - - // NonHistMetricValues: - // the outer slice must be the same length as the number of app versions - // the map key must match name of a metric in MetricsInfo - // the inner slice contains the list of all observed metric values for given version and given metric; float value [i]["foo/bar"][k] is the [k]th observation for version [i] for the metric bar under backend foo. - // this struct is meant exclusively for metrics of type other than histogram - NonHistMetricValues []map[string][]float64 `json:"nonHistMetricValues,omitempty" yaml:"nonHistMetricValues,omitempty"` - - // HistMetricValues: - // the outer slice must be the same length as the number of app versions - // the map key must match name of a histogram metric in MetricsInfo - // the inner slice contains the list of all observed histogram buckets for a given version and given metric; value [i]["foo/bar"][k] is the [k]th observed bucket for version [i] for the hist metric `bar` under backend `foo`. - HistMetricValues []map[string][]HistBucket `json:"histMetricValues,omitempty" yaml:"histMetricValues,omitempty"` - - // SummaryMetricValues: - // the outer slice must be the same length as the number of tracks - // the map key must match the name of the summary metric in MetricsInfo - SummaryMetricValues []map[string]summarymetrics.SummaryMetric - - // SLOs involved in this experiment - SLOs *SLOLimits `json:"SLOs,omitempty" yaml:"SLOs,omitempty"` - - // SLOsSatisfied indicator matrices that show if upper and lower SLO limits are satisfied - SLOsSatisfied *SLOResults `json:"SLOsSatisfied,omitempty" yaml:"SLOsSatisfied,omitempty"` - - // Rewards involed in this experiment - Rewards *Rewards `json:"rewards,omitempty" yaml:"rewards,omitempty"` - - // RewardsWinners indicate the winners - RewardsWinners *RewardsWinners `json:"rewardsWinners,omitempty" yaml:"rewardsWinners,omitempty"` -} - -// MetricMeta describes a metric -type MetricMeta struct { - // Description is a human readable description of the metric - Description string `json:"description" yaml:"description"` - // Units for this metric (if any) - Units *string `json:"units,omitempty" yaml:"units,omitempty"` - // Type of the metric. Example: counter - Type MetricType `json:"type" yaml:"type"` + // TaskData is a map of task names to the data produced by said task + TaskData map[string]interface{} `json:"taskData" yaml:"taskData"` } // VersionInfo is basic information about a version @@ -134,53 +99,6 @@ type VersionInfo struct { Track string `json:"track" yaml:"track"` } -// Rewards specify max and min rewards -type Rewards struct { - // Max is list of reward metrics where the version with the maximum value wins - Max []string `json:"max,omitempty" yaml:"max,omitempty"` - // Min is list of reward metrics where the version with the minimum value wins - Min []string `json:"min,omitempty" yaml:"min,omitempty"` -} - -// RewardsWinners are indices of the best versions for each reward metric -type RewardsWinners struct { - // Max rewards - // Max[i] specifies the index of the winner of reward metric Rewards.Max[i] - Max []int `json:"max,omitempty" yaml:"max,omitempty"` - // Min rewards - // Min[i] specifies the index of the winner of reward metric Rewards.Min[i] - Min []int `json:"min,omitempty" yaml:"min,omitempty"` -} - -// SLO is a service level objective -type SLO struct { - // Metric is the fully qualified metric name in the backendName/metricName format - Metric string `json:"metric" yaml:"metric"` - - // Limit is the acceptable limit for this metric - Limit float64 `json:"limit" yaml:"limit"` -} - -// SLOLimits specify upper or lower limits for metrics -type SLOLimits struct { - // Upper limits for metrics - Upper []SLO `json:"upper,omitempty" yaml:"upper,omitempty"` - - // Lower limits for metrics - Lower []SLO `json:"lower,omitempty" yaml:"lower,omitempty"` -} - -// SLOResults specify the results of SLO evaluations -type SLOResults struct { - // Upper limits for metrics - // Upper[i][j] specifies if upper SLO i is satisfied by version j - Upper [][]bool `json:"upper,omitempty" yaml:"upper,omitempty"` - - // Lower limits for metrics - // Lower[i][j] specifies if lower SLO i is satisfied by version j - Lower [][]bool `json:"lower,omitempty" yaml:"lower,omitempty"` -} - // TaskMeta provides common fields used across all tasks type TaskMeta struct { // Task is the name of the task @@ -190,7 +108,6 @@ type TaskMeta struct { Run *string `json:"run,omitempty" yaml:"run,omitempty"` // If is the condition used to determine if this task needs to run // If the condition is not satisfied, then it is skipped in an experiment - // Example: SLOs() If *string `json:"if,omitempty" yaml:"if,omitempty"` } @@ -243,14 +160,6 @@ func (s *ExperimentSpec) UnmarshalJSON(data []byte) error { return e } tsk = rt - case CustomMetricsTaskName: - cdt := &customMetricsTask{} - if err := json.Unmarshal(tBytes, cdt); err != nil { - e := errors.New("json unmarshal error") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - tsk = cdt case CollectHTTPTaskName: cht := &collectHTTPTask{} if err := json.Unmarshal(tBytes, cht); err != nil { @@ -267,14 +176,6 @@ func (s *ExperimentSpec) UnmarshalJSON(data []byte) error { return e } tsk = cgt - case AssessTaskName: - at := &assessTask{} - if err := json.Unmarshal(tBytes, at); err != nil { - e := errors.New("json unmarshal error") - log.Logger.WithStackTrace(err.Error()).Error(e) - return e - } - tsk = at case NotifyTaskName: nt := ¬ifyTask{} if err := json.Unmarshal(tBytes, nt); err != nil { @@ -296,130 +197,6 @@ func (s *ExperimentSpec) UnmarshalJSON(data []byte) error { return nil } -// metricTypeMatch checks if metric value is a match for its type -func metricTypeMatch(t MetricType, val interface{}) bool { - switch v := val.(type) { - case float64: - return t == CounterMetricType || t == GaugeMetricType - case []float64: - return t == SampleMetricType - case []HistBucket: - return t == HistogramMetricType - case *summarymetrics.SummaryMetric: - return t == SummaryMetricType - default: - log.Logger.Error("unsupported type for metric value: ", v) - return false - } -} - -// updateMetricValueScalar updates a scalar metric value for a given version -func (in *Insights) updateMetricValueScalar(m string, i int, val float64) { - in.NonHistMetricValues[i][m] = append(in.NonHistMetricValues[i][m], val) -} - -// updateMetricValueVector updates a vector metric value for a given version -func (in *Insights) updateMetricValueVector(m string, i int, val []float64) { - in.NonHistMetricValues[i][m] = append(in.NonHistMetricValues[i][m], val...) -} - -// updateMetricValueHist updates a histogram metric value for a given version -func (in *Insights) updateMetricValueHist(m string, i int, val []HistBucket) { - in.HistMetricValues[i][m] = append(in.HistMetricValues[i][m], val...) -} - -// updateSummaryMetric updates a summary metric value for a given version -func (in *Insights) updateSummaryMetric(m string, i int, val *summarymetrics.SummaryMetric) { - in.SummaryMetricValues[i][m] = *val -} - -// registerMetric registers a new metric by adding its meta data -func (in *Insights) registerMetric(m string, mm MetricMeta) error { - if old, ok := in.MetricsInfo[m]; ok && !reflect.DeepEqual(old, mm) { - err := fmt.Errorf("old and new metric meta for %v differ", m) - log.Logger.WithStackTrace(fmt.Sprintf("old: %v \nnew: %v", old, mm)).Error(err) - return err - } - in.MetricsInfo[m] = mm - return nil -} - -// updateMetric registers a metric and adds a metric value for a given version -// metric names will be normalized -func (in *Insights) updateMetric(m string, mm MetricMeta, i int, val interface{}) error { - var err error - if !metricTypeMatch(mm.Type, val) { - err = fmt.Errorf("metric value and type are incompatible; name: %v meta: %v version: %v value: %v", m, mm, i, val) - log.Logger.Error(err) - return err - } - - if in.NumVersions <= i { - err := fmt.Errorf("insufficient number of versions %v with version index %v", in.NumVersions, i) - log.Logger.Error(err) - return err - } - - nm, err := NormalizeMetricName(m) - if err != nil { - return err - } - - err = in.registerMetric(nm, mm) - if err != nil { - return err - } - - switch mm.Type { - case CounterMetricType, GaugeMetricType: - in.updateMetricValueScalar(nm, i, val.(float64)) - case SampleMetricType: - in.updateMetricValueVector(nm, i, val.([]float64)) - case HistogramMetricType: - in.updateMetricValueHist(nm, i, val.([]HistBucket)) - case SummaryMetricType: - in.updateSummaryMetric(nm, i, val.(*summarymetrics.SummaryMetric)) - default: - err := fmt.Errorf("unknown metric type %v", mm.Type) - log.Logger.Error(err) - } - return nil -} - -// setRewards sets the Rewards field in insights -// if this function is called multiple times (example, due to looping), then -// it is intended to be called with the same argument each time -func (in *Insights) setRewards(rewards *Rewards) error { - if in.SLOs != nil { - if reflect.DeepEqual(in.Rewards, rewards) { - return nil - } - e := fmt.Errorf("old and new value of rewards conflict") - log.Logger.WithStackTrace(fmt.Sprint("old: ", in.Rewards, "new: ", rewards)).Error(e) - return e - } - // LHS will be nil - in.Rewards = rewards - return nil -} - -// setSLOs sets the SLOs field in insights -// if this function is called multiple times (example, due to looping), then -// it is intended to be called with the same argument each time -func (in *Insights) setSLOs(slos *SLOLimits) error { - if in.SLOs != nil { - if reflect.DeepEqual(in.SLOs, slos) { - return nil - } - e := fmt.Errorf("old and new value of slos conflict") - log.Logger.WithStackTrace(fmt.Sprint("old: ", in.SLOs, "new: ", slos)).Error(e) - return e - } - // LHS will be nil - in.SLOs = slos - return nil -} - // TrackVersionStr creates a string of version name/track for display purposes func (in *Insights) TrackVersionStr(i int) string { // if VersionNames not defined or all fields empty return default "version i" @@ -442,35 +219,13 @@ func (in *Insights) TrackVersionStr(i int) string { return in.VersionNames[i].Track + " (" + in.VersionNames[i].Version + ")" } -// initializeSLOsSatisfied initializes the SLOs satisfied field -func (exp *Experiment) initializeSLOsSatisfied() error { - if exp.Result.Insights.SLOsSatisfied != nil { - return nil // already initialized - } - // LHS will be nil - exp.Result.Insights.SLOsSatisfied = &SLOResults{ - Upper: make([][]bool, 0), - Lower: make([][]bool, 0), - } - if exp.Result.Insights.SLOs != nil { - exp.Result.Insights.SLOsSatisfied.Upper = make([][]bool, len(exp.Result.Insights.SLOs.Upper)) - for i := 0; i < len(exp.Result.Insights.SLOs.Upper); i++ { - exp.Result.Insights.SLOsSatisfied.Upper[i] = make([]bool, exp.Result.Insights.NumVersions) - } - exp.Result.Insights.SLOsSatisfied.Lower = make([][]bool, len(exp.Result.Insights.SLOs.Lower)) - for i := 0; i < len(exp.Result.Insights.SLOs.Lower); i++ { - exp.Result.Insights.SLOsSatisfied.Lower[i] = make([]bool, exp.Result.Insights.NumVersions) - } - } - return nil -} - // initResults initializes the results section of an experiment func (exp *Experiment) initResults(revision int) { exp.Result = &ExperimentResult{ + Name: exp.Metadata.Name, + Namespace: exp.Metadata.Namespace, Revision: revision, StartTime: time.Now(), - NumLoops: 0, NumCompletedTasks: 0, Failure: false, Iter8Version: MajorMinor, @@ -481,305 +236,22 @@ func (exp *Experiment) initResults(revision int) { // insights data structure contains metrics data structures, so this will also // init metrics func (r *ExperimentResult) initInsightsWithNumVersions(n int) error { - if r.Insights != nil { - if r.Insights.NumVersions != n { - err := fmt.Errorf("inconsistent number for app versions; old (%v); new (%v)", r.Insights.NumVersions, n) - log.Logger.Error(err) - return err - } - } else { + if r.Insights == nil { r.Insights = &Insights{ NumVersions: n, + TaskData: map[string]interface{}{}, } - } - return r.Insights.initMetrics() -} - -// initMetrics initializes the data structes inside insights that will hold metrics -func (in *Insights) initMetrics() error { - if in.NonHistMetricValues != nil || - in.HistMetricValues != nil || - in.SummaryMetricValues != nil { - if len(in.NonHistMetricValues) != in.NumVersions || - len(in.HistMetricValues) != in.NumVersions || - len(in.SummaryMetricValues) != in.NumVersions { - err := fmt.Errorf("inconsistent number for app versions in non hist metric values (%v), hist metric values (%v), num versions (%v)", len(in.NonHistMetricValues), len(in.HistMetricValues), in.NumVersions) - log.Logger.Error(err) - return err - } - if len(in.NonHistMetricValues[0])+len(in.HistMetricValues[0])+len(in.SummaryMetricValues[0]) != len(in.MetricsInfo) { - err := fmt.Errorf("inconsistent number for metrics in non hist metric values (%v), hist metric values (%v), metrics info (%v)", len(in.NonHistMetricValues[0]), len(in.HistMetricValues[0]), len(in.MetricsInfo)) + } else { + if r.Insights.NumVersions != n { + err := fmt.Errorf("inconsistent number for app versions; old (%v); new (%v)", r.Insights.NumVersions, n) log.Logger.Error(err) return err } - return nil - } - // at this point, there are no known metrics, but there are in.NumVersions - // initialize metrics info - in.MetricsInfo = make(map[string]MetricMeta) - // initialize non hist metric values for each version - in.NonHistMetricValues = make([]map[string][]float64, in.NumVersions) - // initialize hist metric values for each version - in.HistMetricValues = make([]map[string][]HistBucket, in.NumVersions) - // initialize summary metric values for each version - in.SummaryMetricValues = make([]map[string]summarymetrics.SummaryMetric, in.NumVersions) - for i := 0; i < in.NumVersions; i++ { - in.NonHistMetricValues[i] = make(map[string][]float64) - in.HistMetricValues[i] = make(map[string][]HistBucket) - in.SummaryMetricValues[i] = make(map[string]summarymetrics.SummaryMetric) } - return nil -} -// getCounterOrGaugeMetricFromValuesMap gets the value of the given counter or gauge metric, for the given version, from metric values map -func (in *Insights) getCounterOrGaugeMetricFromValuesMap(i int, m string) *float64 { - if mm, ok := in.MetricsInfo[m]; ok { - log.Logger.Tracef("found metric info for %v", m) - if (mm.Type != CounterMetricType) && (mm.Type != GaugeMetricType) { - log.Logger.Errorf("metric %v is not of type counter or gauge", m) - return nil - } - l := len(in.NonHistMetricValues) - if l <= i { - log.Logger.Warnf("metric values not found for version %v; initialized for %v versions", i, l) - return nil - } - log.Logger.Tracef("metric values found for version %v", i) - // grab the metric value and return - if vals, ok := in.NonHistMetricValues[i][m]; ok { - log.Logger.Tracef("found metric value for version %v and metric %v", i, m) - if len(vals) > 0 { - return float64Pointer(vals[len(vals)-1]) - } - } - log.Logger.Infof("could not find metric value for version %v and metric %v", i, m) - } - log.Logger.Infof("could not find metric info for %v", m) return nil } -// getSampleAggregation aggregates the given base metric for the given version (i) with the given aggregation (a) -func (in *Insights) getSampleAggregation(i int, baseMetric string, a string) *float64 { - at := AggregationType(a) - vals := in.NonHistMetricValues[i][baseMetric] - if len(vals) == 0 { - log.Logger.Infof("metric %v for version %v has no sample", baseMetric, i) - return nil - } - if len(vals) == 1 { - log.Logger.Warnf("metric %v for version %v has sample of size 1", baseMetric, i) - return float64Pointer(vals[0]) - } - switch at { - case MeanAggregator: - agg, err := stats.Mean(vals) - if err == nil { - return float64Pointer(agg) - } - log.Logger.WithStackTrace(err.Error()).Errorf("aggregation error for version %v, metric %v, and aggregation func %v", i, baseMetric, a) - return nil - case StdDevAggregator: - agg, err := stats.StandardDeviation(vals) - if err == nil { - return float64Pointer(agg) - } - log.Logger.WithStackTrace(err.Error()).Errorf("aggregation error version %v, metric %v, and aggregation func %v", i, baseMetric, a) - return nil - case MinAggregator: - agg, err := stats.Min(vals) - if err == nil { - return float64Pointer(agg) - } - log.Logger.WithStackTrace(err.Error()).Errorf("aggregation error version %v, metric %v, and aggregation func %v", i, baseMetric, a) - return nil - case MaxAggregator: - agg, err := stats.Max(vals) - if err == nil { - return float64Pointer(agg) - } - log.Logger.WithStackTrace(err.Error()).Errorf("aggregation error version %v, metric %v, and aggregation func %v", i, baseMetric, a) - return nil - default: // don't do anything - } - - // at this point, 'a' must be a percentile aggregator - var percent float64 - var err error - if strings.HasPrefix(a, "p") { - b := strings.TrimPrefix(a, "p") - // b must be a percent - if match, _ := regexp.MatchString(decimalRegex, b); match { - // extract percent - if percent, err = strconv.ParseFloat(b, 64); err != nil { - log.Logger.WithStackTrace(err.Error()).Errorf("error extracting percent from aggregation func %v", a) - return nil - } - // compute percentile - agg, err := stats.Percentile(vals, percent) - if err == nil { - return float64Pointer(agg) - } - log.Logger.WithStackTrace(err.Error()).Errorf("aggregation error version %v, metric %v, and aggregation func %v", i, baseMetric, a) - return nil - } - log.Logger.Errorf("unable to extract percent from agggregation func %v", a) - return nil - } - log.Logger.Errorf("invalid aggregation %v", a) - return nil -} - -// getSummaryAggregation aggregates the given base metric for the given version (i) with the given aggregation (a) -func (in *Insights) getSummaryAggregation(i int, baseMetric string, a string) *float64 { - at := AggregationType(a) - m, ok := in.SummaryMetricValues[i][baseMetric] - if !ok { // metric not in list - log.Logger.Errorf("invalid metric %s", baseMetric) - return nil - } - - switch at { - case CountAggregator: - return float64Pointer(float64(m.Count())) - case MeanAggregator: - return float64Pointer(m.Sum() / float64(m.Count())) - case StdDevAggregator: - // sample variance (bessel's correction) - // ss / (count -1) - mean^2 * count / (count -1) - mean := m.Sum() / float64(m.Count()) - nMinus1 := float64(m.Count() - 1) - return float64Pointer(math.Sqrt((m.SumSquares() / nMinus1) - (mean*mean*float64(m.Count()))/nMinus1)) - case MinAggregator: - return float64Pointer(m.Min()) - case MaxAggregator: - return float64Pointer(m.Max()) - default: - // unknown, do nothing - } - log.Logger.Errorf("invalid aggregation %v", a) - return nil -} - -// aggregateMetric returns the aggregated metric value for a given version and metric -func (in *Insights) aggregateMetric(i int, m string) *float64 { - s := strings.Split(m, "/") - if len(s) != 3 { - // should not have been called - log.Logger.Errorf("metric name %v not valid for aggregation", m) - return nil - } - baseMetric := s[0] + "/" + s[1] - if m, ok := in.MetricsInfo[baseMetric]; ok { - log.Logger.Tracef("found metric %v used for aggregation", baseMetric) - if m.Type == SampleMetricType { - log.Logger.Tracef("metric %v used for aggregation is a sample metric", baseMetric) - return in.getSampleAggregation(i, baseMetric, s[2]) - } else if m.Type == SummaryMetricType { - log.Logger.Tracef("metric %v used for aggregation is a summary metric", baseMetric) - return in.getSummaryAggregation(i, baseMetric, s[2]) - } - log.Logger.Errorf("metric %v used for aggregation is not a sample or summary metric", baseMetric) - return nil - } - log.Logger.Warnf("could not find metric %v used for aggregation", baseMetric) - return nil -} - -// NormalizeMetricName normalizes percentile values in metric names -func NormalizeMetricName(m string) (string, error) { - preHTTP := httpMetricPrefix + "/" + builtInHTTPLatencyPercentilePrefix - preGRPC := gRPCMetricPrefix + "/" + gRPCLatencySampleMetricName + "/" + PercentileAggregatorPrefix - pre := "" - if strings.HasPrefix(m, preHTTP) { // built-in http percentile metric - pre = preHTTP - } else if strings.HasPrefix(m, preGRPC) { // built-in gRPC percentile metric - pre = preGRPC - } - if len(pre) > 0 { - var percent float64 - var e error - remainder := strings.TrimPrefix(m, pre) - if percent, e = strconv.ParseFloat(remainder, 64); e != nil { - err := fmt.Errorf("cannot extract percent from metric %v", m) - log.Logger.WithStackTrace(e.Error()).Error(err) - return m, err - } - // return percent normalized metric name - return fmt.Sprintf("%v%v", pre, percent), nil - } - // already normalized - return m, nil -} - -// ScalarMetricValue gets the value of the given scalar metric for the given version -func (in *Insights) ScalarMetricValue(i int, m string) *float64 { - s := strings.Split(m, "/") - if len(s) == 3 { - log.Logger.Tracef("%v is an aggregated metric", m) - return in.aggregateMetric(i, m) - } else if len(s) == 2 { // this appears to be a non-aggregated metric - var nm string - var err error - if nm, err = NormalizeMetricName(m); err != nil { - return nil - } - return in.getCounterOrGaugeMetricFromValuesMap(i, nm) - } else { - log.Logger.Errorf("invalid metric name %v", m) - log.Logger.Error("metric names must be of the form a/b or a/b/c, where a is the id of the metrics backend, b is the id of a metric name, and c is a valid aggregation function") - return nil - } -} - -// GetMetricsInfo gets metric meta for the given normalized metric name -func (in *Insights) GetMetricsInfo(nm string) (*MetricMeta, error) { - s := strings.Split(nm, "/") - - // this is an aggregated metric - if len(s) == 3 { - log.Logger.Tracef("%v is an aggregated metric", nm) - vm := s[0] + "/" + s[1] - mm, ok := in.MetricsInfo[vm] - if !ok { - err := fmt.Errorf("unable to find info for vector metric: %v", vm) - log.Logger.Error(err) - return nil, err - } - // determine type of aggregation - aggType := CounterMetricType - if AggregationType(s[2]) != CountAggregator { - aggType = GaugeMetricType - } - // format aggregator text - formattedAggregator := s[2] + " value" - if strings.HasPrefix(s[2], PercentileAggregatorPrefix) { - percent := strings.TrimPrefix(s[2], PercentileAggregatorPrefix) - formattedAggregator = fmt.Sprintf("%v-th percentile value", percent) - } - // return metrics meta - return &MetricMeta{ - Description: fmt.Sprintf("%v of %v", formattedAggregator, vm), - Units: mm.Units, - Type: aggType, - }, nil - } - - // this is a non-aggregated metric - if len(s) == 2 { - mm, ok := in.MetricsInfo[nm] - if !ok { - err := fmt.Errorf("unable to find info for scalar metric: %v", nm) - log.Logger.Error(err) - return nil, err - } - return &mm, nil - } - - err := fmt.Errorf("invalid metric name %v; metric names must be of the form a/b or a/b/c, where a is the id of the metrics backend, b is the id of a metric name, and c is a valid aggregation function", nm) - log.Logger.Error(err) - return nil, err -} - // Driver enables interacting with experiment result stored externally type Driver interface { // Read the experiment @@ -809,92 +281,22 @@ func (exp *Experiment) NoFailure() bool { return exp != nil && exp.Result != nil && !exp.Result.Failure } -// getSLOsSatisfiedBy returns the set of versions which satisfy SLOs -func (exp *Experiment) getSLOsSatisfiedBy() []int { - if exp == nil { - log.Logger.Warning("nil experiment") - return nil - } - if exp.Result == nil { - log.Logger.Warning("nil experiment result") - return nil - } - if exp.Result.Insights == nil { - log.Logger.Warning("nil insights in experiment result") - return nil - } - if exp.Result.Insights.NumVersions == 0 { - log.Logger.Warning("experiment does not involve any versions") - return nil - } - if exp.Result.Insights.SLOs == nil { - log.Logger.Info("experiment does not involve any SLOs") - sat := []int{} - for j := 0; j < exp.Result.Insights.NumVersions; j++ { - sat = append(sat, j) - } - return sat - } - log.Logger.Debug("experiment involves at least one version and at least one SLO") - log.Logger.Trace(exp.Result.Insights.SLOs) - log.Logger.Trace(exp.Result.Insights.SLOsSatisfied) - log.Logger.Trace(exp.Result.Insights.NonHistMetricValues) - sat := []int{} - for j := 0; j < exp.Result.Insights.NumVersions; j++ { - satThis := true - for i := 0; i < len(exp.Result.Insights.SLOs.Upper); i++ { - satThis = satThis && exp.Result.Insights.SLOsSatisfied.Upper[i][j] - if !satThis { - break - } - } - for i := 0; i < len(exp.Result.Insights.SLOs.Lower); i++ { - satThis = satThis && exp.Result.Insights.SLOsSatisfied.Lower[i][j] - if !satThis { - break - } - } - if satThis { - sat = append(sat, j) - } - } - return sat -} - -// SLOs returns true if all versions satisfy SLOs -func (exp *Experiment) SLOs() bool { - if exp == nil || exp.Result == nil || exp.Result.Insights == nil { - log.Logger.Warning("experiment, or result, or insights is nil") - return false - } - sby := exp.getSLOsSatisfiedBy() - return exp.Result.Insights.NumVersions == len(sby) -} - // run the experiment func (exp *Experiment) run(driver Driver) error { var err error + exp.driver = driver if exp.Result == nil { err = errors.New("experiment with nil result section cannot be run") log.Logger.Error(err) return err } - log.Logger.Debug("exp result exists now ... ") - exp.incrementNumLoops() - log.Logger.Debugf("experiment loop %d started ...", exp.Result.NumLoops) - exp.resetNumCompletedTasks() - - err = driver.Write(exp) - if err != nil { - return err - } - log.Logger.Debugf("attempting to execute %v tasks", len(exp.Spec)) for i, t := range exp.Spec { log.Logger.Info("task " + fmt.Sprintf("%v: %v", i+1, *getName(t)) + ": started") + shouldRun := true // if task has a condition if cond := getIf(t); cond != nil { @@ -918,11 +320,11 @@ func (exp *Experiment) run(driver Driver) error { if err != nil { log.Logger.Error("task " + fmt.Sprintf("%v: %v", i+1, *getName(t)) + ": " + "failure") exp.failExperiment() - e := driver.Write(exp) - if e != nil { - return e + + err = driver.Write(exp) + if err != nil { + return err } - return err } log.Logger.Info("task " + fmt.Sprintf("%v: %v", i+1, *getName(t)) + ": " + "completed") } else { @@ -930,8 +332,8 @@ func (exp *Experiment) run(driver Driver) error { } exp.incrementNumCompletedTasks() - err = driver.Write(exp) + err = driver.Write(exp) if err != nil { return err } @@ -949,15 +351,6 @@ func (exp *Experiment) incrementNumCompletedTasks() { exp.Result.NumCompletedTasks++ } -func (exp *Experiment) resetNumCompletedTasks() { - exp.Result.NumCompletedTasks = 0 -} - -// incrementNumLoops increments the number of loops (experiment iterations) -func (exp *Experiment) incrementNumLoops() { - exp.Result.NumLoops++ -} - // getIf returns the condition (if any) which determine // whether of not if this task needs to run func getIf(t Task) *string { @@ -1000,14 +393,14 @@ func BuildExperiment(driver Driver) (*Experiment, error) { } // RunExperiment runs an experiment -func RunExperiment(reuseResult bool, driver Driver) error { +func RunExperiment(driver Driver) error { var exp *Experiment var err error if exp, err = BuildExperiment(driver); err != nil { return err } - if !reuseResult { - exp.initResults(driver.GetRevision()) - } + + exp.initResults(driver.GetRevision()) + return exp.run(driver) } diff --git a/base/experiment_test.go b/base/experiment_test.go index 3239fd827..2c3b94d69 100644 --- a/base/experiment_test.go +++ b/base/experiment_test.go @@ -1,12 +1,14 @@ package base import ( + "encoding/json" "fmt" + "io" + "net/http" "os" "testing" "fortio.org/fortio/fhttp" - "github.com/iter8-tools/iter8/base/log" "github.com/stretchr/testify/assert" "sigs.k8s.io/yaml" ) @@ -19,25 +21,21 @@ func TestReadExperiment(t *testing.T) { e := &Experiment{} err = yaml.Unmarshal(b, e) assert.NoError(t, err) - assert.Equal(t, 4, len(e.Spec)) + assert.Equal(t, 1, len(e.Spec)) b, err = os.ReadFile(CompletePath("../testdata", "experiment_grpc.yaml")) assert.NoError(t, err) e = &Experiment{} err = yaml.Unmarshal(b, e) assert.NoError(t, err) - assert.Equal(t, 3, len(e.Spec)) - - b, err = os.ReadFile(CompletePath("../testdata", "experiment_db.yaml")) - assert.NoError(t, err) - e = &Experiment{} - err = yaml.Unmarshal(b, e) - assert.NoError(t, err) - assert.Equal(t, 4, len(e.Spec)) + assert.Equal(t, 1, len(e.Spec)) } func TestRunningTasks(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -45,6 +43,8 @@ func TestRunningTasks(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", GetTrackingHandler(&verifyHandlerCalled)) + _ = os.Chdir(t.TempDir()) + // valid collect task... should succeed ct := &collectHTTPTask{ TaskMeta: TaskMeta{ @@ -59,43 +59,27 @@ func TestRunningTasks(t *testing.T) { }, } - // valid assess task... should succeed - at := &assessTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(AssessTaskName), - }, - With: assessInputs{ - SLOs: &SLOLimits{ - Upper: []SLO{{ - Metric: httpMetricPrefix + "/" + builtInHTTPErrorCountID, - Limit: 0, - }}, - }, - }, - } - exp := &Experiment{ - Spec: []Task{ct, at}, + Spec: []Task{ct}, Result: &ExperimentResult{}, + Metadata: ExperimentMetadata{ + Name: myName, + Namespace: myNamespace, + }, } exp.initResults(1) - err := ct.run(exp) + err = ct.run(exp) assert.NoError(t, err) assert.Equal(t, exp.Result.Insights.NumVersions, 1) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) - - err = at.run(exp) - assert.NoError(t, err) - - // SLOs should be satisfied by app - for i := 0; i < len(exp.Result.Insights.SLOs.Upper); i++ { // i^th SLO - assert.True(t, exp.Result.Insights.SLOsSatisfied.Upper[i][0]) // satisfied by only version - } } func TestRunExperiment(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -103,6 +87,33 @@ func TestRunExperiment(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", GetTrackingHandler(&verifyHandlerCalled)) + // mock metrics server + StartHTTPMock(t) + metricsServerCalled := false + MockMetricsServer(MockMetricsServerInput{ + MetricsServerURL: metricsServerURL, + ExperimentResultCallback: func(req *http.Request) { + metricsServerCalled = true + + // check query parameters + assert.Equal(t, myName, req.URL.Query().Get("experiment")) + assert.Equal(t, myNamespace, req.URL.Query().Get("namespace")) + + // check payload + body, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.NotNil(t, body) + + // check payload content + bodyExperimentResult := ExperimentResult{} + err = json.Unmarshal(body, &bodyExperimentResult) + assert.NoError(t, err) + assert.NotNil(t, body) + }, + }) + + _ = os.Chdir(t.TempDir()) + // create experiment.yaml CreateExperimentYaml(t, CompletePath("../testdata", "experiment.tpl"), url, "experiment.yaml") b, err := os.ReadFile("experiment.yaml") @@ -111,18 +122,16 @@ func TestRunExperiment(t *testing.T) { e := &Experiment{} err = yaml.Unmarshal(b, e) assert.NoError(t, err) - assert.Equal(t, 4, len(e.Spec)) + assert.Equal(t, 1, len(e.Spec)) - err = RunExperiment(false, &mockDriver{e}) + err = RunExperiment(&mockDriver{e}) assert.NoError(t, err) + assert.True(t, metricsServerCalled) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) assert.True(t, e.Completed()) assert.True(t, e.NoFailure()) - expBytes, _ := yaml.Marshal(e) - log.Logger.Debug("\n" + string(expBytes)) - assert.True(t, e.SLOs()) } func TestFailExperiment(t *testing.T) { @@ -135,3 +144,30 @@ func TestFailExperiment(t *testing.T) { exp.failExperiment() assert.False(t, exp.NoFailure()) } + +func TestUnmarshalJSONError(t *testing.T) { + tests := []struct { + specBytes string + errMessage string + }{ + { + specBytes: "hello world", + errMessage: `invalid character 'h' looking for beginning of value`, + }, + { + specBytes: "[{}]", + errMessage: `invalid task found without a task name or a run command`, + }, + { + specBytes: `[{"task":"hello world"}]`, + errMessage: `unknown task: hello world`, + }, + } + + for _, test := range tests { + exp := ExperimentSpec{} + err := exp.UnmarshalJSON([]byte(test.specBytes)) + assert.Error(t, err) + assert.EqualError(t, err, test.errMessage) + } +} diff --git a/base/insights_test.go b/base/insights_test.go index 063e963bc..299b731c7 100644 --- a/base/insights_test.go +++ b/base/insights_test.go @@ -3,7 +3,6 @@ package base import ( "testing" - "github.com/iter8-tools/iter8/base/summarymetrics" "github.com/stretchr/testify/assert" ) @@ -25,101 +24,3 @@ func TestTrackVersionStr(t *testing.T) { }) } } - -func TestGetSummaryAggregation(t *testing.T) { - in := Insights{ - // count, sum, min, max, sumsquares - SummaryMetricValues: []map[string]summarymetrics.SummaryMetric{{ - "metric": [5]float64{float64(10), float64(110), float64(2), float64(20), float64(1540)}, - }}, - } - - assert.Equal(t, float64(10), *in.getSummaryAggregation(0, "metric", "count")) - assert.Equal(t, float64(11), *in.getSummaryAggregation(0, "metric", "mean")) - // assert.Equal(t, float64(6.055300708194983), *in.getSummaryAggregation(0, "metric", "stddev")) - assert.Greater(t, float64(6.0553008), *in.getSummaryAggregation(0, "metric", "stddev")) - assert.Less(t, float64(6.0553007), *in.getSummaryAggregation(0, "metric", "stddev")) - assert.Equal(t, float64(2), *in.getSummaryAggregation(0, "metric", "min")) - assert.Equal(t, float64(20), *in.getSummaryAggregation(0, "metric", "max")) - - assert.Nil(t, in.getSummaryAggregation(0, "metric", "invalid")) - - assert.Nil(t, in.getSummaryAggregation(0, "notametric", "count")) -} - -func TestGetSampleAggregation(t *testing.T) { - // no values - in := Insights{ - NonHistMetricValues: []map[string][]float64{{ - "metric": []float64{}, - }}, - } - assert.Nil(t, in.getSampleAggregation(0, "metric", "something")) - - // single value - in = Insights{ - NonHistMetricValues: []map[string][]float64{{ - "metric": []float64{float64(2)}, - }}, - } - assert.Equal(t, float64(2), *in.getSampleAggregation(0, "metric", "anything")) - - // multiple values - in = Insights{ - NonHistMetricValues: []map[string][]float64{{ - "metric": []float64{ - float64(2), float64(4), float64(6), float64(8), float64(10), - float64(12), float64(14), float64(16), float64(18), float64(20), - }, - }}, - } - assert.Len(t, in.NonHistMetricValues, 1) - assert.Len(t, in.NonHistMetricValues[0], 1) - assert.Contains(t, in.NonHistMetricValues[0], "metric") - assert.Equal(t, float64(11), *in.getSampleAggregation(0, "metric", "mean")) - // assert.Equal(t, float64(5.744562646538029), *in.getSampleAggregation(0, "metric", "stddev")) - assert.Greater(t, float64(5.7445627), *in.getSampleAggregation(0, "metric", "stddev")) - assert.Less(t, float64(5.7445626), *in.getSampleAggregation(0, "metric", "stddev")) - assert.Equal(t, float64(2), *in.getSampleAggregation(0, "metric", "min")) - assert.Equal(t, float64(20), *in.getSampleAggregation(0, "metric", "max")) - // starts with p but not a percentile - assert.Nil(t, in.getSampleAggregation(0, "metric", "p-notpercent")) - // invalid percentile (101) - assert.Nil(t, in.getSampleAggregation(0, "metric", "p101")) - assert.Equal(t, float64(15), *in.getSampleAggregation(0, "metric", "p78.3")) - // not a valid aggregation - assert.Nil(t, in.getSampleAggregation(0, "metric", "invalid")) -} - -func TestAggregateMetric(t *testing.T) { - in := Insights{ - MetricsInfo: map[string]MetricMeta{ - "prefix/summary": {Type: SummaryMetricType}, - "prefix/sample": {Type: SampleMetricType}, - "prefix/counter": {Type: CounterMetricType}, - "prefix/gauge": {Type: GaugeMetricType}, - }, - NonHistMetricValues: []map[string][]float64{{ - "prefix/sample": []float64{ - float64(2), float64(4), float64(6), float64(8), float64(10), - float64(12), float64(14), float64(16), float64(18), float64(20), - }, - }}, - // count, sum, min, max, sumsquares - SummaryMetricValues: []map[string]summarymetrics.SummaryMetric{{ - "prefix/summary": [5]float64{float64(10), float64(110), float64(2), float64(20), float64(1540)}, - }}, - } - - // not enough parts - assert.Nil(t, in.aggregateMetric(0, "counter")) - // not enough parts - assert.Nil(t, in.aggregateMetric(0, "prefix/counter")) - // not a summary or sample metric - assert.Nil(t, in.aggregateMetric(0, "prefix/counter/mean")) - // not in MetricsInfo - assert.Nil(t, in.aggregateMetric(0, "prefix/invalid/mean")) - - assert.Equal(t, float64(11), *in.aggregateMetric(0, "prefix/summary/mean")) - assert.Equal(t, float64(11), *in.aggregateMetric(0, "prefix/sample/mean")) -} diff --git a/base/metrics.go b/base/metrics.go index 136e59141..396ff5dcf 100644 --- a/base/metrics.go +++ b/base/metrics.go @@ -1,48 +1,88 @@ package base -// HistBucket is a single bucket in a histogram -type HistBucket struct { - // Lower endpoint of a histogram bucket - Lower float64 `json:"lower" yaml:"lower"` - // Upper endpoint of a histogram bucket - Upper float64 `json:"upper" yaml:"upper"` - // Count is the frequency count of the bucket - Count uint64 `json:"count" yaml:"count"` -} - -// MetricType identifies the type of the metric. -type MetricType string +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "net/url" -// AggregationType identifies the type of the metric aggregator. -type AggregationType string + log "github.com/iter8-tools/iter8/base/log" +) const ( - // CounterMetricType corresponds to Prometheus Counter metric type - CounterMetricType MetricType = "Counter" - // GaugeMetricType corresponds to Prometheus Gauge metric type - GaugeMetricType MetricType = "Gauge" - // HistogramMetricType corresponds to a Histogram metric type - HistogramMetricType MetricType = "Histogram" - // SampleMetricType corresponds to a Sample metric type - SampleMetricType MetricType = "Sample" - // SummaryMetricType corresponds to a Summary metric type - SummaryMetricType MetricType = "Summary" - - // decimalRegex is the regex used to identify percentiles - decimalRegex = `^([\d]+(\.[\d]*)?|\.[\d]+)$` - - // CountAggregator corresponds to aggregation of type count - CountAggregator AggregationType = "count" - // MeanAggregator corresponds to aggregation of type mean - MeanAggregator AggregationType = "mean" - // StdDevAggregator corresponds to aggregation of type stddev - StdDevAggregator AggregationType = "stddev" - // MinAggregator corresponds to aggregation of type min - MinAggregator AggregationType = "min" - // MaxAggregator corresponds to aggregation of type max - MaxAggregator AggregationType = "max" - // PercentileAggregator corresponds to aggregation of type max - PercentileAggregator AggregationType = "percentile" - // PercentileAggregatorPrefix corresponds to prefix for percentiles - PercentileAggregatorPrefix = "p" + // MetricsServerURL is the URL of the metrics server + MetricsServerURL = "METRICS_SERVER_URL" + + // MetricsPath is the path to the GET /metrics endpoint + MetricsPath = "/metrics" + + // ExperimentResultPath is the path to the PUT /experimentResult endpoint + ExperimentResultPath = "/experimentResult" + // HTTPDashboardPath is the path to the GET /httpDashboard endpoint + HTTPDashboardPath = "/httpDashboard" + // GRPCDashboardPath is the path to the GET /grpcDashboard endpoint + GRPCDashboardPath = "/grpcDashboard" ) + +// callMetricsService is a general function that can be used to send data to the metrics service +func callMetricsService(method, metricsServerURL, path string, queryParams map[string]string, payload interface{}) error { + // handle URL and URL parameters + u, err := url.ParseRequestURI(metricsServerURL + path) + if err != nil { + return err + } + + params := url.Values{} + for paramKey, paramValue := range queryParams { + params.Add(paramKey, paramValue) + } + u.RawQuery = params.Encode() + urlStr := fmt.Sprintf("%v", u) + + log.Logger.Trace(fmt.Sprintf("call metrics service URL: %s", urlStr)) + + // handle payload + dataBytes, err := json.Marshal(payload) + if err != nil { + log.Logger.Error("cannot JSON marshal data for metrics server request: ", err) + return err + } + + // create request + req, err := http.NewRequest(method, urlStr, bytes.NewBuffer(dataBytes)) + if err != nil { + log.Logger.Error("cannot create new HTTP request metrics server: ", err) + return err + } + + req.Header.Set("Content-Type", "application/json") + + log.Logger.Trace("sending request") + + // send request + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + log.Logger.Error("could not send request to metrics server: ", err) + return err + } + defer func() { + err = resp.Body.Close() + if err != nil { + log.Logger.Error("could not close response body: ", err) + } + }() + + log.Logger.Trace("sent request") + + return nil +} + +// PutExperimentResultToMetricsService sends the experiment result to the metrics service +func PutExperimentResultToMetricsService(metricsServerURL, namespace, experiment string, experimentResult *ExperimentResult) error { + return callMetricsService(http.MethodPut, metricsServerURL, ExperimentResultPath, map[string]string{ + "namespace": namespace, + "experiment": experiment, + }, experimentResult) +} diff --git a/base/mock_qs_test.go b/base/mock_qs_test.go deleted file mode 100644 index a2ad326bd..000000000 --- a/base/mock_qs_test.go +++ /dev/null @@ -1,115 +0,0 @@ -package base - -import ( - "fmt" - "os" - "testing" - - "fortio.org/fortio/fhttp" - "github.com/stretchr/testify/assert" -) - -func TestMockQuickStartWithSLOs(t *testing.T) { - _ = os.Chdir(t.TempDir()) - mux, addr := fhttp.DynamicHTTPServer(false) - mux.HandleFunc("/echo1/", fhttp.EchoHandler) - testURL := fmt.Sprintf("http://localhost:%d/echo1/", addr.Port) - - // valid collect HTTP task... should succeed - ct := &collectHTTPTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(CollectHTTPTaskName), - }, - With: collectHTTPInputs{ - endpoint: endpoint{ - Duration: StringPointer("2s"), - Headers: map[string]string{}, - URL: testURL, - }, - }, - } - - at := &assessTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(AssessTaskName), - }, - With: assessInputs{ - SLOs: &SLOLimits{ - Upper: []SLO{{ - Metric: "http/latency-mean", - Limit: 100, - }}, - }, - }, - } - exp := &Experiment{ - Spec: []Task{ct, at}, - } - - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - err := exp.Spec[0].run(exp) - assert.NoError(t, err) - err = exp.Spec[1].run(exp) - assert.NoError(t, err) - // assert SLOs are satisfied - for _, v := range exp.Result.Insights.SLOsSatisfied.Upper { - for _, b := range v { - assert.True(t, b) - } - } -} - -func TestMockQuickStartWithSLOsAndPercentiles(t *testing.T) { - _ = os.Chdir(t.TempDir()) - mux, addr := fhttp.DynamicHTTPServer(false) - mux.HandleFunc("/echo1/", fhttp.EchoHandler) - testURL := fmt.Sprintf("http://localhost:%d/echo1/", addr.Port) - - // valid collect HTTP task... should succeed - ct := &collectHTTPTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(CollectHTTPTaskName), - }, - With: collectHTTPInputs{ - endpoint: endpoint{ - Duration: StringPointer("1s"), - Headers: map[string]string{}, - URL: testURL, - }, - }, - } - - at := &assessTask{ - TaskMeta: TaskMeta{ - Task: StringPointer(AssessTaskName), - }, - With: assessInputs{ - SLOs: &SLOLimits{ - Upper: []SLO{{ - Metric: "http/latency-mean", - Limit: 100, - }, { - Metric: "http/latency-p95.00", - Limit: 200, - }}, - }, - }, - } - exp := &Experiment{ - Spec: []Task{ct, at}, - } - - exp.initResults(1) - _ = exp.Result.initInsightsWithNumVersions(1) - err := exp.Spec[0].run(exp) - assert.NoError(t, err) - err = exp.Spec[1].run(exp) - assert.NoError(t, err) - // assert SLOs are satisfied - for _, v := range exp.Result.Insights.SLOsSatisfied.Upper { - for _, b := range v { - assert.True(t, b) - } - } -} diff --git a/base/must_merge_overwrite.go b/base/must_merge_overwrite.go deleted file mode 100644 index 2ae73dc6a..000000000 --- a/base/must_merge_overwrite.go +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright (C) 2013-2020 Masterminds - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package base - -import ( - "github.com/imdario/mergo" - "github.com/iter8-tools/iter8/base/log" -) - -// mustMergeOverwrite merge maps giving precedence to the right side -func mustMergeOverwrite(dst map[string]interface{}, srcs ...map[string]interface{}) (interface{}, error) { - for _, src := range srcs { - if err := mergo.MergeWithOverwrite(&dst, src); err != nil { - // the following log line is the diff between the original sprig func and ours - log.Logger.Error(err) - return nil, err - } - } - return dst, nil -} diff --git a/base/notify.go b/base/notify.go index 234481094..af4a0949f 100644 --- a/base/notify.go +++ b/base/notify.go @@ -63,9 +63,6 @@ type Report struct { // NumCompletedTasks is the number of completed tasks in the experiment NumCompletedTasks int `json:"numCompletedTasks" yaml:"numCompletedTasks"` - // NumLoops is the current loop of the experiment - NumLoops int `json:"numLoops" yaml:"numLoops"` - // Experiment is the experiment struct Experiment *Experiment `json:"experiment" yaml:"experiment"` } @@ -79,7 +76,6 @@ func getReport(exp *Experiment) map[string]Report { NoTaskFailures: exp.NoFailure(), NumTasks: len(exp.Spec), NumCompletedTasks: exp.Result.NumCompletedTasks, - NumLoops: exp.Result.NumLoops, Experiment: exp, }, } @@ -110,14 +106,14 @@ func (t *notifyTask) getPayload(exp *Experiment) (string, error) { return "", nil } -// initializeDefaults sets default values for the custom metrics task +// initializeDefaults sets default values func (t *notifyTask) initializeDefaults() { // set default HTTP method if t.With.Method == "" { if t.With.PayloadTemplateURL != "" { - t.With.Method = "POST" + t.With.Method = http.MethodPost } else { - t.With.Method = "GET" + t.With.Method = http.MethodGet } } } diff --git a/base/notify_test.go b/base/notify_test.go index 62c5392a0..761bb9072 100644 --- a/base/notify_test.go +++ b/base/notify_test.go @@ -24,23 +24,21 @@ func getNotifyTask(t *testing.T, n notifyInputs) *notifyTask { }, With: n, } - - httpmock.Activate() - t.Cleanup(httpmock.DeactivateAndReset) - httpmock.RegisterNoResponder(httpmock.InitialTransport.RoundTrip) return nt } // GET method func TestNotify(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ URL: testNotifyURL, SoftFailure: false, }) // notify endpoint - httpmock.RegisterResponder("GET", testNotifyURL, + httpmock.RegisterResponder(http.MethodGet, testNotifyURL, httpmock.NewStringResponder(200, "success")) exp := &Experiment{ @@ -65,15 +63,17 @@ type testNotification struct { // POST method and PayloadTemplateURL func TestNotifyWithPayload(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ - Method: "POST", + Method: http.MethodPost, URL: testNotifyURL, PayloadTemplateURL: testNotifyURL + templatePath, SoftFailure: false, }) // payload template endpoint - httpmock.RegisterResponder("GET", testNotifyURL+templatePath, + httpmock.RegisterResponder(http.MethodGet, testNotifyURL+templatePath, httpmock.NewStringResponder(200, `{ "text": "hello world", "textReport": "{{ regexReplaceAll "\"" (regexReplaceAll "\n" (.Report | toPrettyJson) "\\n") "\\\""}}", @@ -82,7 +82,7 @@ func TestNotifyWithPayload(t *testing.T) { // notify endpoint httpmock.RegisterResponder( - "POST", + http.MethodPost, testNotifyURL, func(req *http.Request) (*http.Response, error) { buf := new(bytes.Buffer) @@ -133,6 +133,8 @@ func TestNotifyWithPayload(t *testing.T) { // GET method and headers and query parameters func TestNotifyWithHeadersAndQueryParams(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ URL: testNotifyURL, Headers: map[string]string{ @@ -146,7 +148,7 @@ func TestNotifyWithHeadersAndQueryParams(t *testing.T) { // notify endpoint httpmock.RegisterResponder( - "GET", + http.MethodGet, testNotifyURL, func(req *http.Request) (*http.Response, error) { // check headers @@ -175,6 +177,8 @@ func TestNotifyWithHeadersAndQueryParams(t *testing.T) { // bad method and SoftFailure func TestNotifyBadMethod(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ URL: testNotifyURL, Method: "abc", @@ -193,6 +197,8 @@ func TestNotifyBadMethod(t *testing.T) { // test should fail assert.Error(t, err) + StartHTTPMock(t) + nt = getNotifyTask(t, notifyInputs{ URL: testNotifyURL, Method: "abc", @@ -215,6 +221,8 @@ func TestNotifyBadMethod(t *testing.T) { // default to POST method with PayloadTemplateURL func TestNotifyPayloadTemplateURLDefaultMethod(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ URL: testNotifyURL, PayloadTemplateURL: testNotifyURL + templatePath, @@ -222,12 +230,12 @@ func TestNotifyPayloadTemplateURLDefaultMethod(t *testing.T) { }) // payload template endpoint - httpmock.RegisterResponder("GET", testNotifyURL+templatePath, + httpmock.RegisterResponder(http.MethodGet, testNotifyURL+templatePath, httpmock.NewStringResponder(200, `hello world`)) // notify endpoint httpmock.RegisterResponder( - "GET", + http.MethodGet, testNotifyURL, func(req *http.Request) (*http.Response, error) { assert.Fail(t, "notify task did not default to POST method with PayloadTemplateURL") @@ -238,7 +246,7 @@ func TestNotifyPayloadTemplateURLDefaultMethod(t *testing.T) { // notify endpoint httpmock.RegisterResponder( - "POST", + http.MethodPost, testNotifyURL, func(req *http.Request) (*http.Response, error) { return httpmock.NewStringResponse(200, "success"), nil @@ -261,6 +269,8 @@ func TestNotifyPayloadTemplateURLDefaultMethod(t *testing.T) { // No URL func TestNotifyNoURL(t *testing.T) { _ = os.Chdir(t.TempDir()) + StartHTTPMock(t) + nt := getNotifyTask(t, notifyInputs{ SoftFailure: false, }) diff --git a/base/test_helpers.go b/base/test_helpers.go index e82bb5c80..111fedcd6 100644 --- a/base/test_helpers.go +++ b/base/test_helpers.go @@ -2,12 +2,15 @@ package base import ( "bytes" + "fmt" "net/http" "os" "path/filepath" "testing" "time" + log "github.com/iter8-tools/iter8/base/log" + "github.com/jarcoal/httpmock" "github.com/stretchr/testify/assert" ) @@ -22,8 +25,21 @@ func (m *mockDriver) Read() (*Experiment, error) { } // Write an experiment -func (m *mockDriver) Write(e *Experiment) error { - m.Experiment = e +func (m *mockDriver) Write(exp *Experiment) error { + m.Experiment = exp + + // get URL of metrics server from environment variable + metricsServerURL, ok := os.LookupEnv(MetricsServerURL) + if !ok { + errorMessage := "could not look up METRICS_SERVER_URL environment variable" + log.Logger.Error(errorMessage) + return fmt.Errorf(errorMessage) + } + + err := PutExperimentResultToMetricsService(metricsServerURL, exp.Metadata.Namespace, exp.Metadata.Name, exp.Result) + if err != nil { + return err + } return nil } @@ -34,7 +50,6 @@ func (m *mockDriver) GetRevision() int { // CreateExperimentYaml creates an experiment.yaml file from a template and a URL func CreateExperimentYaml(t *testing.T, template string, url string, output string) { - values := struct { URL string }{ @@ -64,3 +79,68 @@ func GetTrackingHandler(breadcrumb *bool) func(w http.ResponseWriter, r *http.Re w.WriteHeader(200) } } + +// StartHTTPMock activates and cleanups httpmock +func StartHTTPMock(t *testing.T) { + httpmock.Activate() + t.Cleanup(httpmock.DeactivateAndReset) + httpmock.RegisterNoResponder(httpmock.InitialTransport.RoundTrip) +} + +// MetricsServerCallback is a callback function for when the particular metrics server endpoint +// is called +type MetricsServerCallback func(req *http.Request) + +// MockMetricsServerInput is the input for MockMetricsServer() +// allows the user to provide callbacks when particular endpoints are called +type MockMetricsServerInput struct { + MetricsServerURL string + + // PUT /experimentResult + ExperimentResultCallback MetricsServerCallback + // GET /grpcDashboard + GRPCDashboardCallback MetricsServerCallback + // GET /httpDashboard + HTTPDashboardCallback MetricsServerCallback +} + +// MockMetricsServer is a mock metrics server +// use the callback functions in the MockMetricsServerInput to test if those endpoints are called +func MockMetricsServer(input MockMetricsServerInput) { + // PUT /experimentResult + httpmock.RegisterResponder( + http.MethodPut, + input.MetricsServerURL+ExperimentResultPath, + func(req *http.Request) (*http.Response, error) { + if input.ExperimentResultCallback != nil { + input.ExperimentResultCallback(req) + } + return httpmock.NewStringResponse(200, "success"), nil + }, + ) + + // GET /httpDashboard + httpmock.RegisterResponder( + http.MethodGet, + input.MetricsServerURL+HTTPDashboardPath, + func(req *http.Request) (*http.Response, error) { + if input.HTTPDashboardCallback != nil { + input.HTTPDashboardCallback(req) + } + + return httpmock.NewStringResponse(200, "success"), nil + }, + ) + + // GET /grpcDashboard + httpmock.RegisterResponder( + http.MethodGet, + input.MetricsServerURL+GRPCDashboardPath, + func(req *http.Request) (*http.Response, error) { + if input.GRPCDashboardCallback != nil { + input.GRPCDashboardCallback(req) + } + return httpmock.NewStringResponse(200, "success"), nil + }, + ) +} diff --git a/base/util.go b/base/util.go index 1ff99e8b1..a93f8e0af 100644 --- a/base/util.go +++ b/base/util.go @@ -44,11 +44,6 @@ func float32Pointer(f float32) *float32 { return &f } -// float64Pointer takes an float64 as input, creates a new variable with the input value, and returns a pointer to the variable -func float64Pointer(f float64) *float64 { - return &f -} - // StringPointer takes string as input, creates a new variable with the input value, and returns a pointer to the variable func StringPointer(s string) *string { return &s diff --git a/base/util_test.go b/base/util_test.go index a40f7e480..c7c172ce1 100644 --- a/base/util_test.go +++ b/base/util_test.go @@ -78,3 +78,32 @@ func TestSplitApplication(t *testing.T) { assert.Equal(t, "default", ns) assert.Equal(t, "name", n) } + +type testType struct { + S string + I int + Nested struct { + S string + I int + } +} + +func TestToYAML(t *testing.T) { + obj := testType{ + S: "hello world", + I: 3, + Nested: struct { + S string + I int + }{ + S: "nested", + }, + } + + objString := ToYAML(obj) + assert.Equal(t, `I: 3 +Nested: + I: 0 + S: nested +S: hello world`, string(objString)) +} diff --git a/cmd/autox.go b/cmd/autox.go deleted file mode 100644 index d8e2dacdd..000000000 --- a/cmd/autox.go +++ /dev/null @@ -1,41 +0,0 @@ -package cmd - -import ( - "os" - "os/signal" - "syscall" - - "github.com/iter8-tools/iter8/autox" - "github.com/spf13/cobra" -) - -// autoxDesc is the description of autox cmd -const autoxDesc = ` -Run the Iter8 autoX controller. - iter8 autox -` - -// newAutoXCmd creates the autox command -func newAutoXCmd() *cobra.Command { - cmd := &cobra.Command{ - Use: "autox", - Short: "Start the Iter8 autoX controller", - Long: autoxDesc, - RunE: func(_ *cobra.Command, _ []string) error { - stopCh := make(chan struct{}) - defer close(stopCh) - - if err := autox.Start(stopCh, nil); err != nil { - return err - } - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGTERM, os.Interrupt) - <-sigCh - - return nil - }, - SilenceUsage: true, - Hidden: true, - } - return cmd -} diff --git a/cmd/controllers.go b/cmd/controllers.go index 134c67fc1..ffce71f5b 100644 --- a/cmd/controllers.go +++ b/cmd/controllers.go @@ -52,16 +52,16 @@ func newControllersCmd(stopCh <-chan struct{}, client k8sclient.Interface) *cobr var err error client, err = k8sclient.New(settings) if err != nil { - log.Logger.Error("could not obtain Kube client ... ") + log.Logger.Error("could not obtain Kube client... ") return err } } if err := controllers.Start(stopCh, client); err != nil { - log.Logger.Error("controllers did not start ... ") + log.Logger.Error("controllers did not start... ") return err } - log.Logger.Debug("started controllers ... ") + log.Logger.Debug("started controllers... ") // launch gRPC server to respond to frontend requests go func() { @@ -84,7 +84,7 @@ func newControllersCmd(stopCh <-chan struct{}, client k8sclient.Interface) *cobr sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGTERM, os.Interrupt) <-sigCh - log.Logger.Warn("SIGTERM ... ") + log.Logger.Warn("SIGTERM... ") } return nil diff --git a/cmd/controllers_test.go b/cmd/controllers_test.go index 969fcbbd5..085a2ec75 100644 --- a/cmd/controllers_test.go +++ b/cmd/controllers_test.go @@ -25,5 +25,4 @@ func TestControllers(t *testing.T) { cmd := newControllersCmd(ctx.Done(), kubeClient) err := cmd.RunE(cmd, nil) assert.NoError(t, err) - } diff --git a/cmd/docs_test.go b/cmd/docs_test.go index d6641a4ec..92260b687 100644 --- a/cmd/docs_test.go +++ b/cmd/docs_test.go @@ -9,7 +9,7 @@ import ( func TestDocs(t *testing.T) { _ = os.Chdir(t.TempDir()) tests := []cmdTestCase{ - // assert, SLOs + // assert { name: "create docs", cmd: fmt.Sprintf("docs --commandDocsDir %v", t.TempDir()), diff --git a/cmd/k.go b/cmd/k.go index 7a51a1a9b..63450eba7 100644 --- a/cmd/k.go +++ b/cmd/k.go @@ -40,9 +40,6 @@ func init() { os.Exit(1) } - // add k assert - kcmd.AddCommand(newKAssertCmd(kd)) - // add k delete kcmd.AddCommand(newKDeleteCmd(kd, os.Stdout)) @@ -52,10 +49,6 @@ func init() { // add k log kcmd.AddCommand(newKLogCmd(kd)) - // add k report - kcmd.AddCommand(newKReportCmd(kd)) - // add k run kcmd.AddCommand(newKRunCmd(kd, os.Stdout)) - } diff --git a/cmd/kassert.go b/cmd/kassert.go deleted file mode 100644 index 1d484ad7e..000000000 --- a/cmd/kassert.go +++ /dev/null @@ -1,71 +0,0 @@ -package cmd - -import ( - "errors" - "fmt" - "time" - - ia "github.com/iter8-tools/iter8/action" - "github.com/iter8-tools/iter8/base/log" - "github.com/iter8-tools/iter8/driver" - "github.com/spf13/cobra" -) - -// kassertDesc is the description of the k assert cmd -const kassertDesc = ` -Assert if the result of a Kubernetes experiment satisfies the specified conditions. If all conditions are satisfied, the command exits with code 0. Else, the command exits with code 1. - -Assertions are especially useful for automation inside CI/CD/GitOps pipelines. - -Supported conditions are 'completed', 'nofailure', 'slos', which indicate that the experiment has completed, none of the tasks have failed, and the SLOs are satisfied. - - iter8 k assert -c completed -c nofailure -c slos - # same as iter8 k assert -c completed,nofailure,slos - -You can optionally specify a timeout, which is the maximum amount of time to wait for the conditions to be satisfied: - - iter8 k assert -c completed,nofailure,slos -t 5s -` - -// newAssertCmd creates the Kubernetes assert command -func newKAssertCmd(kd *driver.KubeDriver) *cobra.Command { - actor := ia.NewAssertOpts(kd) - - cmd := &cobra.Command{ - Use: "assert", - Short: "Assert if Kubernetes experiment result satisfies conditions", - Long: kassertDesc, - SilenceUsage: true, - RunE: func(_ *cobra.Command, _ []string) error { - allGood, err := actor.KubeRun() - if err != nil { - return err - } - if !allGood { - e := errors.New("assert conditions failed") - log.Logger.Error(e) - return e - } - return nil - }, - } - // options specific to k assert - addExperimentGroupFlag(cmd, &actor.Group) - actor.EnvSettings = settings - - // options shared with assert - addConditionFlag(cmd, &actor.Conditions) - addTimeoutFlag(cmd, &actor.Timeout) - return cmd -} - -// addConditionFlag adds the condition flag to command -func addConditionFlag(cmd *cobra.Command, conditionPtr *[]string) { - cmd.Flags().StringSliceVarP(conditionPtr, "condition", "c", nil, fmt.Sprintf("%v | %v | %v; can specify multiple or separate conditions with commas;", ia.Completed, ia.NoFailure, ia.SLOs)) - _ = cmd.MarkFlagRequired("condition") -} - -// addTimeoutFlag adds timeout flag to command -func addTimeoutFlag(cmd *cobra.Command, timeoutPtr *time.Duration) { - cmd.Flags().DurationVar(timeoutPtr, "timeout", 0, "timeout duration (e.g., 5s)") -} diff --git a/cmd/kassert_test.go b/cmd/kassert_test.go deleted file mode 100644 index c2dc5e649..000000000 --- a/cmd/kassert_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package cmd - -import ( - "context" - "fmt" - "os" - "path/filepath" - "testing" - - "fortio.org/fortio/fhttp" - "github.com/iter8-tools/iter8/base" - id "github.com/iter8-tools/iter8/driver" - "github.com/stretchr/testify/assert" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func TestKAssert(t *testing.T) { - _ = os.Chdir(t.TempDir()) - - // create and configure HTTP endpoint for testing - mux, addr := fhttp.DynamicHTTPServer(false) - url := fmt.Sprintf("http://127.0.0.1:%d/get", addr.Port) - var verifyHandlerCalled bool - mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) - - // create experiment.yaml - base.CreateExperimentYaml(t, base.CompletePath("../testdata", "experiment.tpl"), url, id.ExperimentPath) - - // run test - testAssert(t, id.ExperimentPath, url, "output/kassert.txt", false) - // sanity check -- handler was called - assert.True(t, verifyHandlerCalled) -} - -func TestKAssertFailsSLOs(t *testing.T) { - _ = os.Chdir(t.TempDir()) - - // create and configure HTTP endpoint for testing - mux, addr := fhttp.DynamicHTTPServer(false) - url := fmt.Sprintf("http://127.0.0.1:%d/get", addr.Port) - var verifyHandlerCalled bool - mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) - - // create experiment.yaml - base.CreateExperimentYaml(t, base.CompletePath("../testdata", "experiment_fails.tpl"), url, id.ExperimentPath) - - // run test - testAssert(t, id.ExperimentPath, url, "output/kassertfails.txt", true) - // sanity check -- handler was called - assert.True(t, verifyHandlerCalled) -} - -func testAssert(t *testing.T, experiment string, url string, expectedOutputFile string, expectError bool) { - tests := []cmdTestCase{ - // k launch - { - name: "k launch", - cmd: fmt.Sprintf("k launch -c %v --localChart --set tasks={http,assess} --set http.url=%s --set http.duration=2s", base.CompletePath("../charts", "iter8"), url), - golden: base.CompletePath("../testdata", "output/klaunch.txt"), - }, - // k run - { - name: "k run", - cmd: "k run -g default --namespace default", - }, - // k assert - { - name: "k assert", - cmd: "k assert -c completed -c nofailure -c slos", - golden: base.CompletePath("../testdata", expectedOutputFile), - wantError: expectError, - }, - } - - // fake kube cluster - *kd = *id.NewFakeKubeDriver(settings) - - // read experiment from file created by caller - byteArray, _ := os.ReadFile(filepath.Clean(experiment)) - _, _ = kd.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{id.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - _, _ = kd.Clientset.BatchV1().Jobs("default").Create(context.TODO(), &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-1-job", - Namespace: "default", - }, - }, metav1.CreateOptions{}) - - runTestActionCmd(t, tests) -} diff --git a/cmd/klaunch.go b/cmd/klaunch.go index d6e22277e..8fe518957 100644 --- a/cmd/klaunch.go +++ b/cmd/klaunch.go @@ -14,19 +14,17 @@ import ( const klaunchDesc = ` Launch an experiment inside a Kubernetes cluster. - iter8 k launch --set "tasks={http}" --set http.url=https://httpbin.org/get \ - --set runner=job + iter8 k launch --set "tasks={http}" --set http.url=https://httpbin.org/get Use the dry option to simulate a Kubernetes experiment. This creates the manifest.yaml file, but does not run the experiment, and does not deploy any experiment resource objects in the cluster. iter8 k launch \ --set http.url=https://httpbin.org/get \ - --set runner=job \ --dry The launch command creates the 'charts' subdirectory under the current working directory, downloads the Iter8 experiment chart, and places it under 'charts'. This behavior can be controlled using various launch flags. -This command supports setting values using the same mechanisms as in Helm. Please see https://helm.sh/docs/chart_template_guide/values_files/ for more detailed descriptions. In particular, this command supports the --set, --set-file, --set-string, and -f (--values) options all of which have the same behavior as in Helm. +This command supports setting values using the same mechanisms as in Helm. Please see https://helm.sh/docs/chart_template_guide/values_files/ for more detailed descriptions. In particular, this command supports the --set, --set-file, --set-string, and -f (--values) options all of which have the same behavior as in Helm. ` // newKLaunchCmd creates the Kubernetes launch command diff --git a/cmd/klog_test.go b/cmd/klog_test.go index c0b3e4d0b..1af5c8dac 100644 --- a/cmd/klog_test.go +++ b/cmd/klog_test.go @@ -10,11 +10,17 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" id "github.com/iter8-tools/iter8/driver" + "github.com/stretchr/testify/assert" "github.com/iter8-tools/iter8/base" ) func TestKLog(t *testing.T) { + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(base.MetricsServerURL, metricsServerURL) + assert.NoError(t, err) + _ = os.Chdir(t.TempDir()) tests := []cmdTestCase{ // k launch diff --git a/cmd/kreport.go b/cmd/kreport.go deleted file mode 100644 index 06337e48b..000000000 --- a/cmd/kreport.go +++ /dev/null @@ -1,48 +0,0 @@ -package cmd - -import ( - ia "github.com/iter8-tools/iter8/action" - "github.com/iter8-tools/iter8/driver" - - "github.com/spf13/cobra" -) - -// kreportDesc is the description of the k report cmd -const kreportDesc = ` -Generate a text or HTML report of a Kubernetes experiment. - - iter8 k report - # same as iter8 k report -o text - -or - - iter8 k report -o html > report.html - # view with browser -` - -// newKReportCmd creates the Kubernetes report command -func newKReportCmd(kd *driver.KubeDriver) *cobra.Command { - actor := ia.NewReportOpts(kd) - - cmd := &cobra.Command{ - Use: "report", - Short: "Generate report for Kubernetes experiment", - Long: kreportDesc, - SilenceUsage: true, - RunE: func(_ *cobra.Command, _ []string) error { - return actor.KubeRun(outStream) - }, - } - // options specific to k report - addExperimentGroupFlag(cmd, &actor.Group) - actor.EnvSettings = settings - - // options shared with report - addOutputFormatFlag(cmd, &actor.OutputFormat) - return cmd -} - -// addOutputFormatFlag adds output format flag to the report command -func addOutputFormatFlag(cmd *cobra.Command, outputFormat *string) { - cmd.Flags().StringVarP(outputFormat, "outputFormat", "o", "text", "text | html") -} diff --git a/cmd/kreport_test.go b/cmd/kreport_test.go deleted file mode 100644 index 3fd86c8e8..000000000 --- a/cmd/kreport_test.go +++ /dev/null @@ -1,40 +0,0 @@ -package cmd - -import ( - "context" - "os" - "testing" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - id "github.com/iter8-tools/iter8/driver" - - "github.com/iter8-tools/iter8/base" -) - -func TestKReport(t *testing.T) { - _ = os.Chdir(t.TempDir()) - tests := []cmdTestCase{ - // k report - { - name: "k report", - cmd: "k report", - golden: base.CompletePath("../testdata", "output/kreport.txt"), - }, - } - - // mock the environment - // fake kube cluster - *kd = *id.NewFakeKubeDriver(settings) - byteArray, _ := os.ReadFile(base.CompletePath("../testdata/assertinputs", id.ExperimentPath)) - _, _ = kd.Clientset.CoreV1().Secrets("default").Create(context.TODO(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default", - Namespace: "default", - }, - StringData: map[string]string{id.ExperimentPath: string(byteArray)}, - }, metav1.CreateOptions{}) - - runTestActionCmd(t, tests) -} diff --git a/cmd/krun.go b/cmd/krun.go index 7443b5df4..2d713eb76 100644 --- a/cmd/krun.go +++ b/cmd/krun.go @@ -32,12 +32,5 @@ func newKRunCmd(kd *driver.KubeDriver, out io.Writer) *cobra.Command { }, } addExperimentGroupFlag(cmd, &actor.Group) - addReuseResult(cmd, &actor.ReuseResult) return cmd } - -// addReuseResult allows the experiment to reuse the experiment result for -// looping experiments -func addReuseResult(cmd *cobra.Command, reuseResultPtr *bool) { - cmd.Flags().BoolVar(reuseResultPtr, "reuseResult", false, "reuse experiment result; useful for experiments with multiple loops such as Kubernetes experiments with a cronjob runner") -} diff --git a/cmd/krun_test.go b/cmd/krun_test.go index 25668ee9c..6fb450b7a 100644 --- a/cmd/krun_test.go +++ b/cmd/krun_test.go @@ -2,7 +2,10 @@ package cmd import ( "context" + "encoding/json" "fmt" + "io" + "net/http" "os" "testing" @@ -14,8 +17,16 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + myName = "myName" + myNamespace = "myNamespace" +) + func TestKRun(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(base.MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -23,6 +34,36 @@ func TestKRun(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) + // mock metrics server + base.StartHTTPMock(t) + metricsServerCalled := false + base.MockMetricsServer(base.MockMetricsServerInput{ + MetricsServerURL: metricsServerURL, + ExperimentResultCallback: func(req *http.Request) { + metricsServerCalled = true + + // check query parameters + assert.Equal(t, myName, req.URL.Query().Get("experiment")) + assert.Equal(t, myNamespace, req.URL.Query().Get("namespace")) + + // check payload + body, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.NotNil(t, body) + + // check payload content + bodyExperimentResult := base.ExperimentResult{} + + err = json.Unmarshal(body, &bodyExperimentResult) + assert.NoError(t, err) + assert.NotNil(t, body) + assert.Equal(t, myName, bodyExperimentResult.Name) + assert.Equal(t, myNamespace, bodyExperimentResult.Namespace) + }, + }) + + _ = os.Chdir(t.TempDir()) + // create experiment.yaml base.CreateExperimentYaml(t, base.CompletePath("../testdata", "experiment.tpl"), url, id.ExperimentPath) @@ -51,5 +92,5 @@ func TestKRun(t *testing.T) { runTestActionCmd(t, tests) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) - + assert.True(t, metricsServerCalled) } diff --git a/cmd/root.go b/cmd/root.go index a73b983d9..e0f89fee6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -1,9 +1,6 @@ package cmd import ( - "io" - "os" - "github.com/iter8-tools/iter8/controllers/k8sclient" "github.com/iter8-tools/iter8/driver" @@ -22,8 +19,6 @@ var ( settings = cli.New() // KubeDriver used by actions package kd = driver.NewKubeDriver(settings) - // output stream where log messages are printed - outStream io.Writer = os.Stdout // kubeclient is the client used for controllers package kubeClient k8sclient.Interface ) @@ -69,9 +64,6 @@ func init() { rootCmd.PersistentFlags().StringVarP(&logLevel, "loglevel", "l", "info", "trace, debug, info, warning, error, fatal, panic") rootCmd.SilenceErrors = true // will get printed in Execute() (by cobra.CheckErr()) - // add autox - rootCmd.AddCommand(newAutoXCmd()) - // add docs rootCmd.AddCommand(newDocsCmd()) diff --git a/cmd/test_helpers.go b/cmd/test_helpers.go index 41f4c0dfe..cb1810dee 100644 --- a/cmd/test_helpers.go +++ b/cmd/test_helpers.go @@ -58,12 +58,12 @@ func runTestActionCmd(t *testing.T, tests []cmdTestCase) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - store := storageFixture() _, out, err := executeActionCommandC(store, tt.cmd) if (err != nil) != tt.wantError { t.Errorf("want error = %v, got '%v'", tt.wantError, err) } + if tt.golden != "" { AssertGoldenString(t, out, tt.golden) } @@ -91,7 +91,6 @@ func executeActionCommandStdinC(store *storage.Storage, in *os.File, cmd string) rootCmd.SetErr(buf) rootCmd.SetArgs(args) log.Logger.Out = buf - outStream = buf oldStdin := os.Stdin if in != nil { @@ -205,7 +204,7 @@ func compare(actual []byte, filename string) error { } expected = normalize(expected) if !bytes.Equal(expected, actual) { - return errors.Errorf("does not match golden file %s WANT: '%s' GOT: '%s'", filename, expected, actual) + return errors.Errorf("does not match golden file %s WANT: '%s'\nGOT: '%s'", filename, expected, actual) } return nil } diff --git a/controllers/finalizer_test.go b/controllers/finalizer_test.go index 7f13d8e51..6ebfd28e0 100644 --- a/controllers/finalizer_test.go +++ b/controllers/finalizer_test.go @@ -14,7 +14,6 @@ import ( ) func TestAddFinalizer(t *testing.T) { - u := &unstructured.Unstructured{ Object: map[string]interface{}{ "apiVersion": "v1", diff --git a/controllers/interface_test.go b/controllers/interface_test.go new file mode 100644 index 000000000..841453849 --- /dev/null +++ b/controllers/interface_test.go @@ -0,0 +1,12 @@ +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetAllRoutemaps(t *testing.T) { + rm := DefaultRoutemaps{} + assert.NotNil(t, rm.GetAllRoutemaps()) +} diff --git a/controllers/routemap_test.go b/controllers/routemap_test.go index 0a43260ee..97c061b27 100644 --- a/controllers/routemap_test.go +++ b/controllers/routemap_test.go @@ -414,3 +414,72 @@ routingTemplates: return assert.NoError(t, err) && assert.Equal(t, "11451027137128994800", signature) }, time.Second*2, time.Millisecond*100) } + +func TestGetNamespace(t *testing.T) { + a := "a" + b := "b" + c := "c" + myName := "myName" + myNamespace := "myNamespace" + + rm := routemap{ + ObjectMeta: metav1.ObjectMeta{ + Name: myName, + Namespace: myNamespace, + }, + Versions: []version{ + {Signature: &a}, + {Signature: &b}, + {Signature: &c}, + }, + } + + assert.Equal(t, myNamespace, rm.GetNamespace()) +} + +func TestGetName(t *testing.T) { + a := "a" + b := "b" + c := "c" + myName := "myName" + myNamespace := "myNamespace" + + rm := routemap{ + ObjectMeta: metav1.ObjectMeta{ + Name: myName, + Namespace: myNamespace, + }, + Versions: []version{ + {Signature: &a}, + {Signature: &b}, + {Signature: &c}, + }, + } + + assert.Equal(t, myName, rm.GetName()) +} + +func TestGetVersions(t *testing.T) { + a := "a" + b := "b" + c := "c" + + rm := routemap{ + Versions: []version{ + {Signature: &a}, + {Signature: &b}, + {Signature: &c}, + }, + } + + versions := rm.GetVersions() + assert.Equal(t, 3, len(versions)) +} + +func TestGetSignature(t *testing.T) { + a := "a" + + v := version{Signature: &a} + + assert.Equal(t, &a, v.GetSignature()) +} diff --git a/driver/filedriver.go b/driver/filedriver.go index 0ab011b20..b3e3d7161 100644 --- a/driver/filedriver.go +++ b/driver/filedriver.go @@ -2,12 +2,12 @@ package driver import ( "errors" + "fmt" "os" "path" "github.com/iter8-tools/iter8/base" "github.com/iter8-tools/iter8/base/log" - "sigs.k8s.io/yaml" ) // FileDriver enables reading and writing experiment spec and result files @@ -28,12 +28,22 @@ func (f *FileDriver) Read() (*base.Experiment, error) { // Write the experiment func (f *FileDriver) Write(exp *base.Experiment) error { - b, _ := yaml.Marshal(exp) - err := os.WriteFile(path.Join(f.RunDir, ExperimentPath), b, 0600) + // write to metrics server + // get URL of metrics server from environment variable + metricsServerURL, ok := os.LookupEnv(base.MetricsServerURL) + if !ok { + errorMessage := "could not look up METRICS_SERVER_URL environment variable" + log.Logger.Error(errorMessage) + return fmt.Errorf(errorMessage) + } + + err := base.PutExperimentResultToMetricsService(metricsServerURL, exp.Metadata.Namespace, exp.Metadata.Name, exp.Result) if err != nil { - log.Logger.WithStackTrace(err.Error()).Error("unable to write experiment") - return errors.New("unable to write experiment") + errorMessage := "could not write experiment result to metrics service" + log.Logger.Error(errorMessage) + return fmt.Errorf(errorMessage) } + return nil } diff --git a/driver/filedriver_test.go b/driver/filedriver_test.go index ec57547ce..695a5ca16 100644 --- a/driver/filedriver_test.go +++ b/driver/filedriver_test.go @@ -1,7 +1,10 @@ package driver import ( + "encoding/json" "fmt" + "io" + "net/http" "os" "testing" @@ -10,8 +13,16 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + myName = "myName" + myNamespace = "myNamespace" +) + func TestLocalRun(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(base.MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -19,21 +30,47 @@ func TestLocalRun(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) + // mock metrics server + base.StartHTTPMock(t) + metricsServerCalled := false + base.MockMetricsServer(base.MockMetricsServerInput{ + MetricsServerURL: metricsServerURL, + ExperimentResultCallback: func(req *http.Request) { + metricsServerCalled = true + + // check query parameters + assert.Equal(t, myName, req.URL.Query().Get("experiment")) + assert.Equal(t, myNamespace, req.URL.Query().Get("namespace")) + + // check payload + body, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.NotNil(t, body) + + // check payload content + bodyExperimentResult := base.ExperimentResult{} + err = json.Unmarshal(body, &bodyExperimentResult) + assert.NoError(t, err) + assert.NotNil(t, body) + + // no experiment failure + assert.False(t, bodyExperimentResult.Failure) + }, + }) + + _ = os.Chdir(t.TempDir()) + // create experiment.yaml base.CreateExperimentYaml(t, base.CompletePath("../testdata/drivertests", "experiment.tpl"), url, ExperimentPath) fd := FileDriver{ RunDir: ".", } - err := base.RunExperiment(false, &fd) + err = base.RunExperiment(&fd) assert.NoError(t, err) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) - - // check results - exp, err := base.BuildExperiment(&fd) - assert.NoError(t, err) - assert.True(t, exp.Completed() && exp.NoFailure() && exp.SLOs()) + assert.True(t, metricsServerCalled) } func TestFileDriverReadError(t *testing.T) { diff --git a/driver/kubedriver.go b/driver/kubedriver.go index c9f1bafb4..d0db248bc 100644 --- a/driver/kubedriver.go +++ b/driver/kubedriver.go @@ -31,7 +31,6 @@ import ( "helm.sh/helm/v3/pkg/getter" "helm.sh/helm/v3/pkg/release" "k8s.io/client-go/kubernetes" - "sigs.k8s.io/yaml" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -210,51 +209,24 @@ func (kd *KubeDriver) Read() (*base.Experiment, error) { return ExperimentFromBytes(b) } -// formExperimentSecret creates the experiment secret using the experiment -func (kd *KubeDriver) formExperimentSecret(e *base.Experiment) (*corev1.Secret, error) { - byteArray, err := yaml.Marshal(e) - if err != nil { - return nil, err - } - // log.Logger.Debug(string(byteArray)) - sec := corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: kd.getExperimentSecretName(), - Annotations: map[string]string{ - "iter8.tools/group": kd.Group, - }, - }, - StringData: map[string]string{ExperimentPath: string(byteArray)}, +// Write writes a Kubernetes experiment +func (kd *KubeDriver) Write(exp *base.Experiment) error { + // write to metrics server + // get URL of metrics server from environment variable + metricsServerURL, ok := os.LookupEnv(base.MetricsServerURL) + if !ok { + errorMessage := "could not look up METRICS_SERVER_URL environment variable" + log.Logger.Error(errorMessage) + return fmt.Errorf(errorMessage) } - // formed experiment secret ... - return &sec, nil -} -// updateExperimentSecret updates the experiment secret -// as opposed to patch, update is an atomic operation -func (kd *KubeDriver) updateExperimentSecret(e *base.Experiment) error { - if sec, err := kd.formExperimentSecret(e); err == nil { - secretsClient := kd.Clientset.CoreV1().Secrets(kd.Namespace()) - _, err1 := secretsClient.Update(context.Background(), sec, metav1.UpdateOptions{}) - // TODO: Evaluate if result secret update requires retries. - // Probably not. Conflicts will be avoided if cronjob avoids parallel jobs. - if err1 != nil { - err2 := fmt.Errorf("unable to update secret %v", sec.Name) - log.Logger.WithStackTrace(err1.Error()).Error(err2) - return err2 - } - } else { - return err + err := base.PutExperimentResultToMetricsService(metricsServerURL, exp.Metadata.Namespace, exp.Metadata.Name, exp.Result) + if err != nil { + errorMessage := "could not write experiment result to metrics service" + log.Logger.Error(errorMessage) + return fmt.Errorf(errorMessage) } - return nil -} -// Write writes a Kubernetes experiment -func (kd *KubeDriver) Write(e *base.Experiment) error { - if err := kd.updateExperimentSecret(e); err != nil { - log.Logger.WithStackTrace(err.Error()).Error("unable to write experiment") - return errors.New("unable to write experiment") - } return nil } @@ -495,7 +467,7 @@ func (kd *KubeDriver) GetExperimentLogs() (string, error) { req := podsClient.GetLogs(p.Name, &corev1.PodLogOptions{}) podLogs, err := req.Stream(context.TODO()) if err != nil { - e := errors.New("error in opening log stream") + e := fmt.Errorf("error in opening log stream: %e", err) log.Logger.Error(e) return "", e } @@ -507,7 +479,7 @@ func (kd *KubeDriver) GetExperimentLogs() (string, error) { buf := new(bytes.Buffer) _, err = io.Copy(buf, podLogs) if err != nil { - e := errors.New("error in copy information from podLogs to buf") + e := fmt.Errorf("error in copy information from podLogs to buf: %e", err) log.Logger.Error(e) return "", e } diff --git a/driver/kubedriver_test.go b/driver/kubedriver_test.go index 773290f05..93e1cecf4 100644 --- a/driver/kubedriver_test.go +++ b/driver/kubedriver_test.go @@ -2,7 +2,10 @@ package driver import ( "context" + "encoding/json" "fmt" + "io" + "net/http" "os" "testing" @@ -12,7 +15,6 @@ import ( "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/cli" "helm.sh/helm/v3/pkg/cli/values" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -28,7 +30,7 @@ func TestKOps(t *testing.T) { // install err = kd.install(action.ChartPathOptions{}, base.CompletePath("../", "charts/iter8"), values.Options{ - Values: []string{"tasks={http}", "http.url=https://httpbin.org/get", "runner=job"}, + Values: []string{"tasks={http}", "http.url=https://httpbin.org/get"}, }, kd.Group, false) assert.NoError(t, err) @@ -43,7 +45,7 @@ func TestKOps(t *testing.T) { // upgrade err = kd.upgrade(action.ChartPathOptions{}, base.CompletePath("../", "charts/iter8"), values.Options{ - Values: []string{"tasks={http}", "http.url=https://httpbin.org/get", "runner=job"}, + Values: []string{"tasks={http}", "http.url=https://httpbin.org/get"}, }, kd.Group, false) assert.NoError(t, err) @@ -66,7 +68,10 @@ func TestKOps(t *testing.T) { } func TestKubeRun(t *testing.T) { - _ = os.Chdir(t.TempDir()) + // define METRICS_SERVER_URL + metricsServerURL := "http://iter8.default:8080" + err := os.Setenv(base.MetricsServerURL, metricsServerURL) + assert.NoError(t, err) // create and configure HTTP endpoint for testing mux, addr := fhttp.DynamicHTTPServer(false) @@ -74,6 +79,36 @@ func TestKubeRun(t *testing.T) { var verifyHandlerCalled bool mux.HandleFunc("/get", base.GetTrackingHandler(&verifyHandlerCalled)) + // mock metrics server + base.StartHTTPMock(t) + metricsServerCalled := false + base.MockMetricsServer(base.MockMetricsServerInput{ + MetricsServerURL: metricsServerURL, + ExperimentResultCallback: func(req *http.Request) { + metricsServerCalled = true + + // check query parameters + assert.Equal(t, myName, req.URL.Query().Get("experiment")) + assert.Equal(t, myNamespace, req.URL.Query().Get("namespace")) + + // check payload + body, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.NotNil(t, body) + + // check payload content + bodyExperimentResult := base.ExperimentResult{} + err = json.Unmarshal(body, &bodyExperimentResult) + assert.NoError(t, err) + assert.NotNil(t, body) + + // no experiment failure + assert.False(t, bodyExperimentResult.Failure) + }, + }) + + _ = os.Chdir(t.TempDir()) + // create experiment.yaml base.CreateExperimentYaml(t, base.CompletePath("../testdata/drivertests", "experiment.tpl"), url, ExperimentPath) @@ -89,26 +124,11 @@ func TestKubeRun(t *testing.T) { StringData: map[string]string{ExperimentPath: string(byteArray)}, }, metav1.CreateOptions{}) - _, _ = kd.Clientset.BatchV1().Jobs("default").Create(context.TODO(), &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: "default-1-job", - Namespace: "default", - Annotations: map[string]string{ - "iter8.tools/group": "default", - "iter8.tools/revision": "1", - }, - }, - }, metav1.CreateOptions{}) - - err := base.RunExperiment(false, kd) + err = base.RunExperiment(kd) assert.NoError(t, err) // sanity check -- handler was called assert.True(t, verifyHandlerCalled) - - // check results - exp, err := base.BuildExperiment(kd) - assert.NoError(t, err) - assert.True(t, exp.Completed() && exp.NoFailure() && exp.SLOs()) + assert.True(t, metricsServerCalled) } func TestLogs(t *testing.T) { diff --git a/go.mod b/go.mod index 45dc38f41..af256ecaa 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ retract ( ) require ( - fortio.org/fortio v1.54.0 + fortio.org/fortio v1.57.3 github.com/Masterminds/sprig v2.22.0+incompatible github.com/antonmedv/expr v1.12.5 github.com/bojand/ghz v0.114.0 @@ -40,7 +40,7 @@ require ( golang.org/x/sys v0.10.0 golang.org/x/text v0.11.0 gonum.org/v1/plot v0.13.0 - google.golang.org/grpc v1.56.1 + google.golang.org/grpc v1.56.2 google.golang.org/protobuf v1.31.0 helm.sh/helm/v3 v3.11.2 k8s.io/api v0.26.3 @@ -53,8 +53,8 @@ require ( cloud.google.com/go/compute v1.19.1 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect fortio.org/dflag v1.5.2 // indirect - fortio.org/log v1.3.0 // indirect - fortio.org/sets v1.0.2 // indirect + fortio.org/log v1.7.0 // indirect + fortio.org/sets v1.0.3 // indirect fortio.org/version v1.0.2 // indirect git.sr.ht/~sbinet/gg v0.4.1 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect diff --git a/go.sum b/go.sum index 78afe72e3..1f64ef671 100644 --- a/go.sum +++ b/go.sum @@ -41,15 +41,15 @@ cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohl cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -fortio.org/assert v1.1.4 h1:Za1RaG+OjsTMpQS3J3UCvTF6wc4+IOHCz+jAOU37Y4o= +fortio.org/assert v1.2.0 h1:XscfvR8yp4xW7OMCvNbCsieRFDxlwdEcb69+JZRp6LA= fortio.org/dflag v1.5.2 h1:F9XVRj4Qr2IbJP7BMj7XZc9wB0Q/RZ61Ool+4YPVad8= fortio.org/dflag v1.5.2/go.mod h1:ppb/A8u+KKg+qUUYZNYuvRnXuVb8IsdHb/XGzsmjkN8= -fortio.org/fortio v1.54.0 h1:2jn8yTd6hcIEoKY4CjI0lI6XxTWVxsMYF2bMiWOmv+Y= -fortio.org/fortio v1.54.0/go.mod h1:SRaZbikL31UoAkw0On2hwpvHrQ0rRVnsAz3UGVNvMRw= -fortio.org/log v1.3.0 h1:bESPvuQGKejw7rrx41Sg3GoF+tsrB7oC08PxBs5/AM0= -fortio.org/log v1.3.0/go.mod h1:u/8/2lyczXq52aT5Nw6reD+3cR6m/EbS2jBiIYhgiTU= -fortio.org/sets v1.0.2 h1:gSWZFg9rgzl1zJfI/93lDJKBFw8WZ3Uxe3oQ5uDM4T4= -fortio.org/sets v1.0.2/go.mod h1:xVjulHr0FhlmReSymI+AhDtQ4FgjiazQ3JmuNpYFMs8= +fortio.org/fortio v1.57.3 h1:kdPlBiws3cFsLcssZxCt2opFmHj14C3yPBokFhMWzmg= +fortio.org/fortio v1.57.3/go.mod h1:ykSkArQICajFCvasfgrpE82Fc4sQ+f9Pm1dKIvducaA= +fortio.org/log v1.7.0 h1:4MbU81zqe/3RYuHpXADNgJwd2KEMAwmMUtuF5qtZTug= +fortio.org/log v1.7.0/go.mod h1:u/8/2lyczXq52aT5Nw6reD+3cR6m/EbS2jBiIYhgiTU= +fortio.org/sets v1.0.3 h1:HzewdGjH69YmyW06yzplL35lGr+X4OcqQt0qS6jbaO4= +fortio.org/sets v1.0.3/go.mod h1:QZVj0r6KP/ZD9ebySW9SgxVNy/NjghUfyHW9NN+WU+4= fortio.org/version v1.0.2 h1:8NwxdX58aoeKx7T5xAPO0xlUu1Hpk42nRz5s6e6eKZ0= fortio.org/version v1.0.2/go.mod h1:2JQp9Ax+tm6QKiGuzR5nJY63kFeANcgrZ0osoQFDVm0= git.sr.ht/~sbinet/cmpimg v0.1.0 h1:E0zPRk2muWuCqSKSVZIWsgtU9pjsw3eKHi8VmQeScxo= @@ -1131,8 +1131,8 @@ google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.56.1 h1:z0dNfjIl0VpaZ9iSVjA6daGatAYwPGstTjt5vkRMFkQ= -google.golang.org/grpc v1.56.1/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= +google.golang.org/grpc v1.56.2 h1:fVRFRnXvU+x6C4IlHZewvJOVHoOv1TUuQyoRsYnB4bI= +google.golang.org/grpc v1.56.2/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/grafana/grpc.json b/grafana/grpc.json index ee945c3ef..e6e393ded 100644 --- a/grafana/grpc.json +++ b/grafana/grpc.json @@ -139,7 +139,7 @@ }, "fields": [ { - "jsonPath": "$.Summary.Failure" + "jsonPath": "$.ExperimentResult.Failure" } ], "method": "GET", @@ -209,7 +209,7 @@ }, "fields": [ { - "jsonPath": "$.Summary.Completed tasks" + "jsonPath": "$.ExperimentResult.Completed tasks" } ], "method": "GET", @@ -294,7 +294,7 @@ }, "fields": [ { - "jsonPath": "$.Summary" + "jsonPath": "$.ExperimentResult" } ], "method": "GET", @@ -309,7 +309,7 @@ "id": "extractFields", "options": { "replace": true, - "source": "Summary" + "source": "ExperimentResult" } }, { diff --git a/grafana/http.json b/grafana/http.json index adcab66a6..9975b9ad0 100644 --- a/grafana/http.json +++ b/grafana/http.json @@ -145,7 +145,7 @@ }, "fields": [ { - "jsonPath": "$.Summary.Failure" + "jsonPath": "$.ExperimentResult.Failure" } ], "method": "GET", @@ -215,7 +215,7 @@ }, "fields": [ { - "jsonPath": "$.Summary.Completed tasks" + "jsonPath": "$.ExperimentResult.Completed tasks" } ], "method": "GET", @@ -285,8 +285,8 @@ "values": false }, "text": { - "titleSize": 30, - "valueSize": 30 + "titleSize": 20, + "valueSize": 20 }, "textMode": "auto" }, @@ -300,7 +300,7 @@ }, "fields": [ { - "jsonPath": "$.Summary" + "jsonPath": "$.ExperimentResult" } ], "method": "GET", @@ -315,7 +315,7 @@ "id": "extractFields", "options": { "replace": true, - "source": "Summary" + "source": "ExperimentResult" } }, { @@ -847,6 +847,6 @@ "timezone": "", "title": "HTTP Performance", "uid": "e8758667-b4e1-41c2-9bf4-446dc7c1fd27", - "version": 8, + "version": 4, "weekStart": "" } \ No newline at end of file diff --git a/metrics/server.go b/metrics/server.go index 89b2343e8..7281c975c 100644 --- a/metrics/server.go +++ b/metrics/server.go @@ -4,24 +4,31 @@ import ( "context" "encoding/json" "fmt" + "io" "math" "net/http" "reflect" "strconv" "time" + "github.com/bojand/ghz/runner" "github.com/iter8-tools/iter8/abn" + "github.com/iter8-tools/iter8/base" util "github.com/iter8-tools/iter8/base" "github.com/iter8-tools/iter8/base/log" "github.com/iter8-tools/iter8/controllers" "github.com/iter8-tools/iter8/storage" "github.com/montanaflynn/stats" "gonum.org/v1/plot/plotter" + + "fortio.org/fortio/fhttp" + fstats "fortio.org/fortio/stats" ) const ( configEnv = "METRICS_CONFIG_FILE" defaultPortNumber = 8080 + timeFormat = "02 Jan 06 15:04 MST" ) // metricsConfig defines the configuration of the controllers @@ -30,6 +37,100 @@ type metricsConfig struct { Port *int `json:"port,omitempty"` } +// versionSummarizedMetric adds version to summary data +type versionSummarizedMetric struct { + Version int + storage.SummarizedMetric +} + +// grafanaHistogram represents the histogram in the Grafana Iter8 dashboard +type grafanaHistogram []grafanaHistogramBucket + +// grafanaHistogramBucket represents a bucket in the histogram in the Grafana Iter8 dashboard +type grafanaHistogramBucket struct { + // Version is the version of the application + Version string + + // Bucket is the bucket of the histogram + // For example: 8-12 + Bucket string + + // Value is the number of points in this bucket + Value float64 +} + +// metricSummary is result for a metric +type metricSummary struct { + HistogramsOverTransactions *grafanaHistogram + HistogramsOverUsers *grafanaHistogram + SummaryOverTransactions []*versionSummarizedMetric + SummaryOverUsers []*versionSummarizedMetric +} + +// dashboardExperimentResult is a capitalized version of ExperimentResult used to display data in Grafana +type dashboardExperimentResult struct { + // Name is the name of this experiment + Name string + + // Namespace is the namespace of this experiment + Namespace string + + // Revision of this experiment + Revision int + + // StartTime is the time when the experiment run started + StartTime string `json:"Start time"` + + // NumCompletedTasks is the number of completed tasks + NumCompletedTasks int `json:"Completed tasks"` + + // Failure is true if any of its tasks failed + Failure bool + + // Insights produced in this experiment + Insights *base.Insights + + // Iter8Version is the version of Iter8 CLI that created this result object + Iter8Version string `json:"Iter8 version"` +} + +// httpEndpointRow is the data needed to produce a single row for an HTTP experiment in the Iter8 Grafana dashboard +type httpEndpointRow struct { + Durations grafanaHistogram + Statistics storage.SummarizedMetric + + ErrorDurations grafanaHistogram `json:"Error durations"` + ErrorStatistics storage.SummarizedMetric `json:"Error statistics"` + + ReturnCodes map[int]int64 `json:"Return codes"` +} + +type httpDashboard struct { + // key is the endpoint + Endpoints map[string]httpEndpointRow + + ExperimentResult dashboardExperimentResult +} + +type ghzStatistics struct { + Count uint64 + ErrorCount float64 +} + +// ghzEndpointRow is the data needed to produce a single row for an gRPC experiment in the Iter8 Grafana dashboard +type ghzEndpointRow struct { + Durations grafanaHistogram + Statistics ghzStatistics + StatusCodeDistribution map[string]int `json:"Status codes"` +} + +type ghzDashboard struct { + // key is the endpoint + Endpoints map[string]ghzEndpointRow + + ExperimentResult dashboardExperimentResult +} + var allRoutemaps controllers.AllRouteMapsInterface = &controllers.DefaultRoutemaps{} // Start starts the HTTP server @@ -47,7 +148,11 @@ func Start(stopCh <-chan struct{}) error { } // configure endpoints - http.HandleFunc("/metrics", getMetrics) + http.HandleFunc(util.MetricsPath, getMetrics) + + http.HandleFunc(util.ExperimentResultPath, putExperimentResult) + http.HandleFunc(util.HTTPDashboardPath, getHTTPDashboard) + http.HandleFunc(util.GRPCDashboardPath, getGRPCDashboard) // configure HTTP server server := &http.Server{ @@ -73,36 +178,6 @@ func Start(stopCh <-chan struct{}) error { return nil } -// VersionSummarizedMetric adds version to summary data -type VersionSummarizedMetric struct { - Version int - storage.SummarizedMetric -} - -// GrafanaHistogram represents the histogram in the Grafana Iter8 dashboard -type GrafanaHistogram []GrafanaHistogramBucket - -// GrafanaHistogramBucket represents a bucket in the histogram in the Grafana Iter8 dashboard -type GrafanaHistogramBucket struct { - // Version is the version of the application - Version string - - // Bucket is the bucket of the histogram - // For example: 8-12 - Bucket string - - // Value is the number of points in this bucket - Value float64 -} - -// MetricSummary is result for a metric -type MetricSummary struct { - HistogramsOverTransactions *GrafanaHistogram - HistogramsOverUsers *GrafanaHistogram - SummaryOverTransactions []*VersionSummarizedMetric - SummaryOverUsers []*VersionSummarizedMetric -} - // getMetrics handles GET /metrics with query parameter application=namespace/name func getMetrics(w http.ResponseWriter, r *http.Request) { log.Logger.Trace("getMetrics called") @@ -114,7 +189,7 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { return } - // verify request (query parameter) + // verify request (query parameters) application := r.URL.Query().Get("application") if application == "" { http.Error(w, "no application specified", http.StatusBadRequest) @@ -131,7 +206,7 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { log.Logger.Tracef("getMetrics found routemap %v", rm) // initialize result - result := make(map[string]*MetricSummary, 0) + result := make(map[string]*metricSummary, 0) byMetricOverTransactions := make(map[string](map[string][]float64), 0) byMetricOverUsers := make(map[string](map[string][]float64), 0) @@ -146,6 +221,10 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { continue } + if abn.MetricsClient == nil { + log.Logger.Error("no metrics client") + continue + } versionmetrics, err := abn.MetricsClient.GetMetrics(application, v, *signature) if err != nil { log.Logger.Debugf("no metrics found for application %s (version %d; signature %s)", application, v, *signature) @@ -156,11 +235,11 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { _, ok := result[metric] if !ok { // no entry for metric result; create empty entry - result[metric] = &MetricSummary{ + result[metric] = &metricSummary{ HistogramsOverTransactions: nil, HistogramsOverUsers: nil, - SummaryOverTransactions: []*VersionSummarizedMetric{}, - SummaryOverUsers: []*VersionSummarizedMetric{}, + SummaryOverTransactions: []*versionSummarizedMetric{}, + SummaryOverUsers: []*versionSummarizedMetric{}, } } @@ -171,7 +250,7 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { log.Logger.Debugf("unable to compute summaried metrics over transactions for application %s (version %d; signature %s)", application, v, *signature) continue } else { - entry.SummaryOverTransactions = append(entry.SummaryOverTransactions, &VersionSummarizedMetric{ + entry.SummaryOverTransactions = append(entry.SummaryOverTransactions, &versionSummarizedMetric{ Version: v, SummarizedMetric: smT, }) @@ -182,14 +261,13 @@ func getMetrics(w http.ResponseWriter, r *http.Request) { log.Logger.Debugf("unable to compute summaried metrics over users for application %s (version %d; signature %s)", application, v, *signature) continue } - entry.SummaryOverUsers = append(entry.SummaryOverUsers, &VersionSummarizedMetric{ + entry.SummaryOverUsers = append(entry.SummaryOverUsers, &versionSummarizedMetric{ Version: v, SummarizedMetric: smU, }) result[metric] = entry // copy data into structure for histogram calculation (to be done later) - // over transaction data vStr := fmt.Sprintf("%d", v) // over transaction data _, ok = byMetricOverTransactions[metric] @@ -290,7 +368,7 @@ func calculateSummarizedMetric(data []float64) (storage.SummarizedMetric, error) // For example: "-0.24178488465151116 - 0.24782423875427073" -> "-0.242 - 0.248" // // TODO: defaults for numBuckets/decimalPlace? -func calculateHistogram(versionMetrics map[string][]float64, numBuckets int, decimalPlace float64) (GrafanaHistogram, error) { +func calculateHistogram(versionMetrics map[string][]float64, numBuckets int, decimalPlace float64) (grafanaHistogram, error) { if numBuckets == 0 { numBuckets = 10 } @@ -322,7 +400,7 @@ func calculateHistogram(versionMetrics map[string][]float64, numBuckets int, dec return nil, fmt.Errorf("cannot create version maximum: %e", err) } - grafanaHistogram := GrafanaHistogram{} + grafanaHistogram := grafanaHistogram{} for version, metrics := range versionMetrics { // convert the raw values to the gonum plot values @@ -347,7 +425,7 @@ func calculateHistogram(versionMetrics map[string][]float64, numBuckets int, dec count-- } - grafanaHistogram = append(grafanaHistogram, GrafanaHistogramBucket{ + grafanaHistogram = append(grafanaHistogram, grafanaHistogramBucket{ Version: version, Bucket: bucketLabel(bin.Min, bin.Max, decimalPlace), Value: count, @@ -370,3 +448,346 @@ func roundDecimal(x float64, decimalPlace float64) float64 { func bucketLabel(min, max float64, decimalPlace float64) string { return fmt.Sprintf("%s - %s", strconv.FormatFloat(roundDecimal(min, decimalPlace), 'f', -1, 64), strconv.FormatFloat(roundDecimal(max, decimalPlace), 'f', -1, 64)) } + +func getHTTPHistogram(fortioHistogram []fstats.Bucket, decimalPlace float64) grafanaHistogram { + grafanaHistogram := grafanaHistogram{} + + for _, bucket := range fortioHistogram { + grafanaHistogram = append(grafanaHistogram, grafanaHistogramBucket{ + Version: "0", + Bucket: bucketLabel(bucket.Start*1000, bucket.End*1000, decimalPlace), + Value: float64(bucket.Count), + }) + } + + return grafanaHistogram +} + +func getHTTPStatistics(fortioHistogram *fstats.HistogramData, decimalPlace float64) storage.SummarizedMetric { + return storage.SummarizedMetric{ + Count: uint64(fortioHistogram.Count), + Mean: fortioHistogram.Avg * 1000, + StdDev: fortioHistogram.StdDev * 1000, + Min: fortioHistogram.Min * 1000, + Max: fortioHistogram.Max * 1000, + } +} + +func getHTTPEndpointRow(httpRunnerResults *fhttp.HTTPRunnerResults) httpEndpointRow { + row := httpEndpointRow{} + if httpRunnerResults.DurationHistogram != nil { + row.Durations = getHTTPHistogram(httpRunnerResults.DurationHistogram.Data, 1) + row.Statistics = getHTTPStatistics(httpRunnerResults.DurationHistogram, 1) + } + + if httpRunnerResults.ErrorsDurationHistogram != nil { + row.ErrorDurations = getHTTPHistogram(httpRunnerResults.ErrorsDurationHistogram.Data, 1) + row.ErrorStatistics = getHTTPStatistics(httpRunnerResults.ErrorsDurationHistogram, 1) + } + + row.ReturnCodes = httpRunnerResults.RetCodes + + return row +} + +func getHTTPDashboardHelper(experimentResult *base.ExperimentResult) httpDashboard { + dashboard := httpDashboard{ + Endpoints: map[string]httpEndpointRow{}, + ExperimentResult: dashboardExperimentResult{ + Name: experimentResult.Name, + Namespace: experimentResult.Namespace, + Revision: experimentResult.Revision, + StartTime: experimentResult.StartTime.Time.Format(timeFormat), + NumCompletedTasks: experimentResult.NumCompletedTasks, + Failure: experimentResult.Failure, + Iter8Version: experimentResult.Iter8Version, + }, + } + + // get raw data from ExperimentResult + httpTaskData := experimentResult.Insights.TaskData[util.CollectHTTPTaskName] + if httpTaskData == nil { + log.Logger.Error("cannot get http task data from Insights") + return dashboard + } + + httpTaskDataBytes, err := json.Marshal(httpTaskData) + if err != nil { + log.Logger.Error("cannot marshal http task data") + return dashboard + } + + httpResult := base.HTTPResult{} + err = json.Unmarshal(httpTaskDataBytes, &httpResult) + if err != nil { + log.Logger.Error("cannot unmarshal http task data into HTTPResult") + return dashboard + } + + // form rows of dashboard + for endpoint, endpointResult := range httpResult { + endpointResult := endpointResult + dashboard.Endpoints[endpoint] = getHTTPEndpointRow(endpointResult) + } + + return dashboard +} + +// getHTTPDashboard handles GET /getHTTPDashboard with query parameter application=namespace/name +func getHTTPDashboard(w http.ResponseWriter, r *http.Request) { + log.Logger.Trace("getHTTPGrafana called") + defer log.Logger.Trace("getHTTPGrafana completed") + + // verify method + if r.Method != http.MethodGet { + http.Error(w, "expected GET", http.StatusMethodNotAllowed) + return + } + + // verify request (query parameters) + namespace := r.URL.Query().Get("namespace") + if namespace == "" { + http.Error(w, "no namespace specified", http.StatusBadRequest) + return + } + + experiment := r.URL.Query().Get("experiment") + if experiment == "" { + http.Error(w, "no experiment specified", http.StatusBadRequest) + return + } + + log.Logger.Tracef("getHTTPGrafana called for namespace %s and experiment %s", namespace, experiment) + + // get fortioResult from metrics client + if abn.MetricsClient == nil { + http.Error(w, "no metrics client", http.StatusInternalServerError) + return + } + + // get experimentResult from metrics client + experimentResult, err := abn.MetricsClient.GetExperimentResult(namespace, experiment) + if err != nil { + errorMessage := fmt.Sprintf("cannot get experiment result with namespace %s, experiment %s", namespace, experiment) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusBadRequest) + return + } + + // JSON marshal the dashboard + dashboardBytes, err := json.Marshal(getHTTPDashboardHelper(experimentResult)) + if err != nil { + errorMessage := "cannot JSON marshal HTTP dashboard" + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusInternalServerError) + return + } + + // finally, send response + w.Header().Add("Content-Type", "application/json") + _, _ = w.Write(dashboardBytes) +} + +func getGRPCHistogram(ghzHistogram []runner.Bucket, decimalPlace float64) grafanaHistogram { + grafanaHistogram := grafanaHistogram{} + + for _, bucket := range ghzHistogram { + grafanaHistogram = append(grafanaHistogram, grafanaHistogramBucket{ + Version: "0", + Bucket: fmt.Sprint(roundDecimal(bucket.Mark*1000, 3)), + Value: float64(bucket.Count), + }) + } + + return grafanaHistogram +} + +func getGRPCStatistics(ghzRunnerReport *runner.Report) ghzStatistics { + // populate error count & rate + ec := float64(0) + for _, count := range ghzRunnerReport.ErrorDist { + ec += float64(count) + } + + return ghzStatistics{ + Count: ghzRunnerReport.Count, + ErrorCount: ec, + } +} + +func getGRPCEndpointRow(ghzRunnerReport *runner.Report) ghzEndpointRow { + row := ghzEndpointRow{} + + if ghzRunnerReport.Histogram != nil { + row.Durations = getGRPCHistogram(ghzRunnerReport.Histogram, 3) + row.Statistics = getGRPCStatistics(ghzRunnerReport) + } + + row.StatusCodeDistribution = ghzRunnerReport.StatusCodeDist + + return row +} + +func getGRPCDashboardHelper(experimentResult *base.ExperimentResult) ghzDashboard { + dashboard := ghzDashboard{ + Endpoints: map[string]ghzEndpointRow{}, + ExperimentResult: dashboardExperimentResult{ + Name: experimentResult.Name, + Namespace: experimentResult.Namespace, + Revision: experimentResult.Revision, + StartTime: experimentResult.StartTime.Time.Format(timeFormat), + NumCompletedTasks: experimentResult.NumCompletedTasks, + Failure: experimentResult.Failure, + Iter8Version: experimentResult.Iter8Version, + }, + } + + // get raw data from ExperimentResult + ghzTaskData := experimentResult.Insights.TaskData[util.CollectGRPCTaskName] + if ghzTaskData == nil { + return dashboard + } + + ghzTaskDataBytes, err := json.Marshal(ghzTaskData) + if err != nil { + log.Logger.Error("cannot marshal ghz task data") + return dashboard + } + + ghzResult := base.GHZResult{} + err = json.Unmarshal(ghzTaskDataBytes, &ghzResult) + if err != nil { + log.Logger.Error("cannot unmarshal ghz task data into GHZResult") + return dashboard + } + + // form rows of dashboard + for endpoint, endpointResult := range ghzResult { + endpointResult := endpointResult + dashboard.Endpoints[endpoint] = getGRPCEndpointRow(endpointResult) + } + + return dashboard +} + +func getGRPCDashboard(w http.ResponseWriter, r *http.Request) { + log.Logger.Trace("getGRPCDashboard called") + defer log.Logger.Trace("getGRPCDashboard completed") + + // verify method + if r.Method != http.MethodGet { + http.Error(w, "expected GET", http.StatusMethodNotAllowed) + return + } + + // verify request (query parameters) + namespace := r.URL.Query().Get("namespace") + if namespace == "" { + http.Error(w, "no namespace specified", http.StatusBadRequest) + return + } + + experiment := r.URL.Query().Get("experiment") + if experiment == "" { + http.Error(w, "no experiment specified", http.StatusBadRequest) + return + } + + log.Logger.Tracef("getGRPCDashboard called for namespace %s and experiment %s", namespace, experiment) + + // get ghz result from metrics client + if abn.MetricsClient == nil { + http.Error(w, "no metrics client", http.StatusInternalServerError) + return + } + + // get experimentResult from metrics client + experimentResult, err := abn.MetricsClient.GetExperimentResult(namespace, experiment) + if err != nil { + errorMessage := fmt.Sprintf("cannot get experiment result with namespace %s, experiment %s", namespace, experiment) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusBadRequest) + return + } + + // JSON marshal the dashboard + dashboardBytes, err := json.Marshal(getGRPCDashboardHelper(experimentResult)) + if err != nil { + errorMessage := "cannot JSON marshal gRPC dashboard" + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusInternalServerError) + return + } + + // finally, send response + w.Header().Add("Content-Type", "application/json") + _, _ = w.Write(dashboardBytes) +} + +// putExperimentResult handles PUT /experimentResult with query parameter application=namespace/name +func putExperimentResult(w http.ResponseWriter, r *http.Request) { + log.Logger.Trace("putResult called") + defer log.Logger.Trace("putResult completed") + + // verify method + if r.Method != http.MethodPut { + http.Error(w, "expected PUT", http.StatusMethodNotAllowed) + return + } + + // verify request (query parameters) + namespace := r.URL.Query().Get("namespace") + if namespace == "" { + http.Error(w, "no namespace specified", http.StatusBadRequest) + return + } + + experiment := r.URL.Query().Get("experiment") + if experiment == "" { + http.Error(w, "no experiment specified", http.StatusBadRequest) + return + } + + log.Logger.Tracef("putResult called for namespace %s and experiment %s", namespace, experiment) + + defer func() { + err := r.Body.Close() + if err != nil { + errorMessage := fmt.Sprintf("cannot close request body: %e", err) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusBadRequest) + return + } + }() + body, err := io.ReadAll(r.Body) + if err != nil { + errorMessage := fmt.Sprintf("cannot read request body: %e", err) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusBadRequest) + return + } + + experimentResult := util.ExperimentResult{} + err = json.Unmarshal(body, &experimentResult) + if err != nil { + errorMessage := fmt.Sprintf("cannot unmarshal body into ExperimentResult: %s: %e", string(body), err) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusBadRequest) + return + } + + if abn.MetricsClient == nil { + http.Error(w, "no metrics client", http.StatusInternalServerError) + return + } + + err = abn.MetricsClient.SetExperimentResult(namespace, experiment, &experimentResult) + if err != nil { + errorMessage := fmt.Sprintf("cannot store result in storage client: %s: %e", string(body), err) + log.Logger.Error(errorMessage) + http.Error(w, errorMessage, http.StatusInternalServerError) + return + } + + // TODO: 201 for new resource, 200 for update +} diff --git a/metrics/server_test.go b/metrics/server_test.go index d0b040400..f818860c6 100644 --- a/metrics/server_test.go +++ b/metrics/server_test.go @@ -1,11 +1,14 @@ package metrics import ( + "bytes" "context" "encoding/json" "fmt" + "io" "net/http" "net/http/httptest" + "net/url" "os" "regexp" "sort" @@ -21,6 +24,490 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + myName = "my-name" + myNamespace = "my-namespace" +) + +const fortioResultJSON = `{ + "http://httpbin.default/get": { + "RunType": "HTTP", + "Labels": "", + "StartTime": "2023-07-21T14:00:40.134434969Z", + "RequestedQPS": "8", + "RequestedDuration": "exactly 100 calls", + "ActualQPS": 7.975606391552989, + "ActualDuration": 12538231589, + "NumThreads": 4, + "Version": "1.57.3", + "DurationHistogram": { + "Count": 100, + "Min": 0.004223875, + "Max": 0.040490042, + "Sum": 1.5977100850000001, + "Avg": 0.015977100850000002, + "StdDev": 0.008340658047253256, + "Data": [ + { + "Start": 0.004223875, + "End": 0.005, + "Percent": 5, + "Count": 5 + }, + { + "Start": 0.005, + "End": 0.006, + "Percent": 10, + "Count": 5 + }, + { + "Start": 0.006, + "End": 0.007, + "Percent": 14, + "Count": 4 + }, + { + "Start": 0.007, + "End": 0.008, + "Percent": 19, + "Count": 5 + }, + { + "Start": 0.008, + "End": 0.009000000000000001, + "Percent": 24, + "Count": 5 + }, + { + "Start": 0.009000000000000001, + "End": 0.01, + "Percent": 28, + "Count": 4 + }, + { + "Start": 0.01, + "End": 0.011, + "Percent": 33, + "Count": 5 + }, + { + "Start": 0.011, + "End": 0.012, + "Percent": 36, + "Count": 3 + }, + { + "Start": 0.012, + "End": 0.014, + "Percent": 48, + "Count": 12 + }, + { + "Start": 0.014, + "End": 0.016, + "Percent": 55, + "Count": 7 + }, + { + "Start": 0.016, + "End": 0.018000000000000002, + "Percent": 65, + "Count": 10 + }, + { + "Start": 0.018000000000000002, + "End": 0.02, + "Percent": 74, + "Count": 9 + }, + { + "Start": 0.02, + "End": 0.025, + "Percent": 85, + "Count": 11 + }, + { + "Start": 0.025, + "End": 0.03, + "Percent": 93, + "Count": 8 + }, + { + "Start": 0.03, + "End": 0.035, + "Percent": 98, + "Count": 5 + }, + { + "Start": 0.035, + "End": 0.04, + "Percent": 99, + "Count": 1 + }, + { + "Start": 0.04, + "End": 0.040490042, + "Percent": 100, + "Count": 1 + } + ], + "Percentiles": [ + { + "Percentile": 50, + "Value": 0.014571428571428572 + }, + { + "Percentile": 75, + "Value": 0.020454545454545454 + }, + { + "Percentile": 90, + "Value": 0.028125 + }, + { + "Percentile": 95, + "Value": 0.032 + }, + { + "Percentile": 99, + "Value": 0.04 + }, + { + "Percentile": 99.9, + "Value": 0.0404410378 + } + ] + }, + "ErrorsDurationHistogram": { + "Count": 0, + "Min": 0, + "Max": 0, + "Sum": 0, + "Avg": 0, + "StdDev": 0, + "Data": null + }, + "Exactly": 100, + "Jitter": false, + "Uniform": false, + "NoCatchUp": false, + "RunID": 0, + "AccessLoggerInfo": "", + "ID": "2023-07-21-140040", + "RetCodes": { + "200": 100 + }, + "IPCountMap": { + "10.96.108.76:80": 4 + }, + "Insecure": false, + "MTLS": false, + "CACert": "", + "Cert": "", + "Key": "", + "UnixDomainSocket": "", + "URL": "http://httpbin.default/get", + "NumConnections": 1, + "Compression": false, + "DisableFastClient": false, + "HTTP10": false, + "H2": false, + "DisableKeepAlive": false, + "AllowHalfClose": false, + "FollowRedirects": false, + "Resolve": "", + "HTTPReqTimeOut": 3000000000, + "UserCredentials": "", + "ContentType": "", + "Payload": null, + "MethodOverride": "", + "LogErrors": false, + "SequentialWarmup": false, + "ConnReuseRange": [ + 0, + 0 + ], + "NoResolveEachConn": false, + "Offset": 0, + "Resolution": 0.001, + "Sizes": { + "Count": 100, + "Min": 413, + "Max": 413, + "Sum": 41300, + "Avg": 413, + "StdDev": 0, + "Data": [ + { + "Start": 413, + "End": 413, + "Percent": 100, + "Count": 100 + } + ] + }, + "HeaderSizes": { + "Count": 100, + "Min": 230, + "Max": 230, + "Sum": 23000, + "Avg": 230, + "StdDev": 0, + "Data": [ + { + "Start": 230, + "End": 230, + "Percent": 100, + "Count": 100 + } + ] + }, + "Sockets": [ + 1, + 1, + 1, + 1 + ], + "SocketCount": 4, + "ConnectionStats": { + "Count": 4, + "Min": 0.001385875, + "Max": 0.001724375, + "Sum": 0.006404583, + "Avg": 0.00160114575, + "StdDev": 0.00013101857565508474, + "Data": [ + { + "Start": 0.001385875, + "End": 0.001724375, + "Percent": 100, + "Count": 4 + } + ], + "Percentiles": [ + { + "Percentile": 50, + "Value": 0.0014987083333333332 + }, + { + "Percentile": 75, + "Value": 0.0016115416666666667 + }, + { + "Percentile": 90, + "Value": 0.0016792416666666667 + }, + { + "Percentile": 95, + "Value": 0.0017018083333333333 + }, + { + "Percentile": 99, + "Value": 0.0017198616666666668 + }, + { + "Percentile": 99.9, + "Value": 0.0017239236666666668 + } + ] + }, + "AbortOn": 0 + } +}` + +const fortioDashboardJSON = `{"Endpoints":{"http://httpbin.default/get":{"Durations":[{"Version":"0","Bucket":"4.2 - 5","Value":5},{"Version":"0","Bucket":"5 - 6","Value":5},{"Version":"0","Bucket":"6 - 7","Value":4},{"Version":"0","Bucket":"7 - 8","Value":5},{"Version":"0","Bucket":"8 - 9","Value":5},{"Version":"0","Bucket":"9 - 10","Value":4},{"Version":"0","Bucket":"10 - 11","Value":5},{"Version":"0","Bucket":"11 - 12","Value":3},{"Version":"0","Bucket":"12 - 14","Value":12},{"Version":"0","Bucket":"14 - 16","Value":7},{"Version":"0","Bucket":"16 - 18","Value":10},{"Version":"0","Bucket":"18 - 20","Value":9},{"Version":"0","Bucket":"20 - 25","Value":11},{"Version":"0","Bucket":"25 - 30","Value":8},{"Version":"0","Bucket":"30 - 35","Value":5},{"Version":"0","Bucket":"35 - 40","Value":1},{"Version":"0","Bucket":"40 - 40.4","Value":1}],"Statistics":{"Count":100,"Mean":15.977100850000001,"StdDev":8.340658047253257,"Min":4.2238750000000005,"Max":40.490041999999995},"Error durations":[],"Error statistics":{"Count":0,"Mean":0,"StdDev":0,"Min":0,"Max":0},"Return codes":{"200":100}}},"ExperimentResult":{"Name":"my-name","Namespace":"my-namespace","Revision":0,"Start time":"01 Jan 01 00:00 UTC","Completed tasks":5,"Failure":false,"Insights":null,"Iter8 version":""}}` + +const ghzResultJSON = `{ + "routeguide.RouteGuide.GetFeature": { + "date": "2023-07-17T12:23:56Z", + "endReason": "normal", + "options": { + "call": "routeguide.RouteGuide.GetFeature", + "host": "routeguide.default:50051", + "proto": "/tmp/ghz.proto", + "import-paths": [ + "/tmp", + "." + ], + "insecure": true, + "load-schedule": "const", + "load-start": 0, + "load-end": 0, + "load-step": 0, + "load-step-duration": 0, + "load-max-duration": 0, + "concurrency": 50, + "concurrency-schedule": "const", + "concurrency-start": 1, + "concurrency-end": 0, + "concurrency-step": 0, + "concurrency-step-duration": 0, + "concurrency-max-duration": 0, + "total": 200, + "connections": 1, + "dial-timeout": 10000000000, + "data": { + "latitude": 407838351, + "longitude": -746143763 + }, + "binary": false, + "CPUs": 5, + "count-errors": true + }, + "count": 200, + "total": 592907667, + "average": 25208185, + "fastest": 32375, + "slowest": 195740917, + "rps": 337.3206506368217, + "errorDistribution": { + "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"": 200 + }, + "statusCodeDistribution": { + "Unavailable": 200 + }, + "latencyDistribution": [ + { + "percentage": 10, + "latency": 35584 + }, + { + "percentage": 25, + "latency": 39958 + }, + { + "percentage": 50, + "latency": 86208 + }, + { + "percentage": 75, + "latency": 12777625 + }, + { + "percentage": 90, + "latency": 106714334 + }, + { + "percentage": 95, + "latency": 189847000 + }, + { + "percentage": 99, + "latency": 195400792 + } + ], + "histogram": [ + { + "mark": 0.000032375, + "count": 1, + "frequency": 0.005 + }, + { + "mark": 0.0196032292, + "count": 167, + "frequency": 0.835 + }, + { + "mark": 0.0391740834, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.05874493759999999, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.07831579179999999, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.097886646, + "count": 3, + "frequency": 0.015 + }, + { + "mark": 0.11745750019999998, + "count": 13, + "frequency": 0.065 + }, + { + "mark": 0.1370283544, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.15659920859999998, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.17617006279999997, + "count": 0, + "frequency": 0 + }, + { + "mark": 0.195740917, + "count": 16, + "frequency": 0.08 + } + ], + "details": [ + { + "timestamp": "2023-07-17T12:23:56.089998719Z", + "latency": 14490041, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.090471886Z", + "latency": 13759125, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.090528678Z", + "latency": 194468542, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.090079886Z", + "latency": 105031291, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.090224928Z", + "latency": 100337083, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.091097053Z", + "latency": 12463750, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.091135844Z", + "latency": 12603875, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + }, + { + "timestamp": "2023-07-17T12:23:56.478469636Z", + "latency": 86208, + "error": "rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.96.20.53:50051: connect: connection refused\"", + "status": "Unavailable" + } + ] + } +}` + +const ghzDashboardJSON = `{"Endpoints":{"routeguide.RouteGuide.GetFeature":{"Durations":[{"Version":"0","Bucket":"0.032","Value":1},{"Version":"0","Bucket":"19.603","Value":167},{"Version":"0","Bucket":"39.174","Value":0},{"Version":"0","Bucket":"58.744","Value":0},{"Version":"0","Bucket":"78.315","Value":0},{"Version":"0","Bucket":"97.886","Value":3},{"Version":"0","Bucket":"117.457","Value":13},{"Version":"0","Bucket":"137.028","Value":0},{"Version":"0","Bucket":"156.599","Value":0},{"Version":"0","Bucket":"176.17","Value":0},{"Version":"0","Bucket":"195.74","Value":16}],"Statistics":{"Count":200,"ErrorCount":200},"Status codes":{"Unavailable":200}}},"ExperimentResult":{"Name":"my-name","Namespace":"my-namespace","Revision":0,"Start time":"01 Jan 01 00:00 UTC","Completed tasks":5,"Failure":false,"Insights":null,"Iter8 version":""}}` + func TestStart(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() @@ -334,5 +821,398 @@ func getTestRM(namespace, name string) *testroutemap { }, normalizedWeights: []uint32{1, 1}, } +} + +func TestTestRM(t *testing.T) { + namespace := "default" + name := "test" + rm := getTestRM(namespace, name) + + assert.Equal(t, namespace, rm.GetNamespace()) + assert.Equal(t, name, rm.GetName()) + assert.Equal(t, []uint32{1, 1}, rm.Weights()) +} + +func TestGetHTTPDashboardHelper(t *testing.T) { + fortioResult := util.HTTPResult{} + err := json.Unmarshal([]byte(fortioResultJSON), &fortioResult) + assert.NoError(t, err) + + experimentResult := util.ExperimentResult{ + Name: myName, + Namespace: myNamespace, + NumCompletedTasks: 5, + Insights: &util.Insights{ + TaskData: map[string]interface{}{ + util.CollectHTTPTaskName: fortioResult, + }, + }, + } + + dashboard := getHTTPDashboardHelper(&experimentResult) + assert.NotNil(t, dashboard) + dashboardBytes, err := json.Marshal(dashboard) + assert.NoError(t, err) + + assert.Equal( + t, + fortioDashboardJSON, + string(dashboardBytes), + ) +} + +func TestGetGRPCDashboardHelper(t *testing.T) { + ghzResult := util.GHZResult{} + err := json.Unmarshal([]byte(ghzResultJSON), &ghzResult) + assert.NoError(t, err) + + experimentResult := util.ExperimentResult{ + Name: myName, + Namespace: myNamespace, + NumCompletedTasks: 5, + Insights: &util.Insights{ + TaskData: map[string]interface{}{ + util.CollectGRPCTaskName: ghzResult, + }, + }, + } + + dashboard := getGRPCDashboardHelper(&experimentResult) + + assert.NotNil(t, dashboard) + dashboardBytes, err := json.Marshal(dashboard) + assert.NoError(t, err) + assert.Equal( + t, + ghzDashboardJSON, + string(dashboardBytes), + ) +} + +func TestPutExperimentResultInvalidMethod(t *testing.T) { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, util.ExperimentResultPath, nil) + putExperimentResult(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + assert.Equal(t, http.StatusMethodNotAllowed, res.StatusCode) +} + +func TestPutExperimentResultMissingParameter(t *testing.T) { + tests := []struct { + queryParams url.Values + expectedStatusCode int + }{ + { + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "namespace": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "experiment": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + } + + for _, test := range tests { + w := httptest.NewRecorder() + + u, err := url.ParseRequestURI(util.ExperimentResultPath) + assert.NoError(t, err) + u.RawQuery = test.queryParams.Encode() + urlStr := fmt.Sprintf("%v", u) + + req := httptest.NewRequest(http.MethodPut, urlStr, nil) + + putExperimentResult(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + assert.Equal(t, test.expectedStatusCode, res.StatusCode) + } +} + +func TestPutExperimentResult(t *testing.T) { + // instantiate metrics client + tempDirPath := t.TempDir() + client, err := badgerdb.GetClient(badger.DefaultOptions(tempDirPath), badgerdb.AdditionalOptions{}) + assert.NoError(t, err) + abn.MetricsClient = client + + w := httptest.NewRecorder() + + // construct inputs to putExperimentResult + u, err := url.ParseRequestURI(util.ExperimentResultPath) + assert.NoError(t, err) + params := url.Values{ + "namespace": {"default"}, + "experiment": {"default"}, + } + u.RawQuery = params.Encode() + urlStr := fmt.Sprintf("%v", u) + + experimentResult := util.ExperimentResult{ + Name: myName, + Namespace: myNamespace, + NumCompletedTasks: 5, + Insights: &util.Insights{ + TaskData: map[string]interface{}{ + util.CollectHTTPTaskName: "hello world", + }, + }, + } + + experimentResultBytes, err := json.Marshal(experimentResult) + assert.NoError(t, err) + + req := httptest.NewRequest(http.MethodPut, urlStr, bytes.NewBuffer(experimentResultBytes)) + // put result into the metrics client + putExperimentResult(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + // check to see if the result is stored in the metrics client + result, err := abn.MetricsClient.GetExperimentResult("default", "default") + assert.NoError(t, err) + assert.Equal(t, &experimentResult, result) +} + +func TestGetHTTPDashboardInvalidMethod(t *testing.T) { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, util.HTTPDashboardPath, nil) + getHTTPDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + assert.Equal(t, http.StatusMethodNotAllowed, res.StatusCode) +} + +func TestGetHTTPDashboardMissingParameter(t *testing.T) { + tests := []struct { + queryParams url.Values + expectedStatusCode int + }{ + { + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "namespace": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "experiment": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + } + + for _, test := range tests { + w := httptest.NewRecorder() + + u, err := url.ParseRequestURI(util.HTTPDashboardPath) + assert.NoError(t, err) + u.RawQuery = test.queryParams.Encode() + urlStr := fmt.Sprintf("%v", u) + req := httptest.NewRequest(http.MethodGet, urlStr, nil) + + getHTTPDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + assert.Equal(t, test.expectedStatusCode, res.StatusCode) + } +} + +func TestGetHTTPDashboard(t *testing.T) { + // instantiate metrics client + tempDirPath := t.TempDir() + client, err := badgerdb.GetClient(badger.DefaultOptions(tempDirPath), badgerdb.AdditionalOptions{}) + assert.NoError(t, err) + abn.MetricsClient = client + + // preload metric client with experiment result + fortioResult := util.HTTPResult{} + err = json.Unmarshal([]byte(fortioResultJSON), &fortioResult) + assert.NoError(t, err) + + experimentResult := util.ExperimentResult{ + Name: myName, + Namespace: myNamespace, + NumCompletedTasks: 5, + Insights: &util.Insights{ + TaskData: map[string]interface{}{ + util.CollectHTTPTaskName: fortioResult, + }, + }, + } + + err = abn.MetricsClient.SetExperimentResult("default", "default", &experimentResult) + assert.NoError(t, err) + + w := httptest.NewRecorder() + + // construct inputs to getHTTPDashboard + u, err := url.ParseRequestURI(util.HTTPDashboardPath) + assert.NoError(t, err) + params := url.Values{ + "namespace": {"default"}, + "experiment": {"default"}, + } + u.RawQuery = params.Encode() + urlStr := fmt.Sprintf("%v", u) + + req := httptest.NewRequest(http.MethodGet, urlStr, nil) + + // get HTTP dashboard based on result in metrics client + getHTTPDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + // check the HTTP dashboard + body, err := io.ReadAll(res.Body) + assert.NoError(t, err) + assert.Equal( + t, + fortioDashboardJSON, + string(body), + ) +} + +func TestGetGRPCDashboardInvalidMethod(t *testing.T) { + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, util.GRPCDashboardPath, nil) + getGRPCDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + assert.Equal(t, http.StatusMethodNotAllowed, res.StatusCode) +} + +func TestGetGRPCDashboardMissingParameter(t *testing.T) { + tests := []struct { + queryParams url.Values + expectedStatusCode int + }{ + { + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "namespace": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + { + queryParams: url.Values{ + "experiment": {"default"}, + }, + expectedStatusCode: http.StatusBadRequest, + }, + } + + for _, test := range tests { + w := httptest.NewRecorder() + + u, err := url.ParseRequestURI(util.GRPCDashboardPath) + assert.NoError(t, err) + u.RawQuery = test.queryParams.Encode() + urlStr := fmt.Sprintf("%v", u) + + req := httptest.NewRequest(http.MethodGet, urlStr, nil) + + getGRPCDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + assert.Equal(t, test.expectedStatusCode, res.StatusCode) + } +} + +func TestGetGRPCDashboard(t *testing.T) { + // instantiate metrics client + tempDirPath := t.TempDir() + client, err := badgerdb.GetClient(badger.DefaultOptions(tempDirPath), badgerdb.AdditionalOptions{}) + assert.NoError(t, err) + abn.MetricsClient = client + + // preload metric client with experiment result + ghzResult := util.GHZResult{} + err = json.Unmarshal([]byte(ghzResultJSON), &ghzResult) + assert.NoError(t, err) + + experimentResult := util.ExperimentResult{ + Name: myName, + Namespace: myNamespace, + NumCompletedTasks: 5, + Insights: &util.Insights{ + TaskData: map[string]interface{}{ + util.CollectGRPCTaskName: ghzResult, + }, + }, + } + + err = abn.MetricsClient.SetExperimentResult("default", "default", &experimentResult) + assert.NoError(t, err) + w := httptest.NewRecorder() + + // construct inputs to getGRPCDashboard + u, err := url.ParseRequestURI(util.GRPCDashboardPath) + assert.NoError(t, err) + params := url.Values{ + "namespace": {"default"}, + "experiment": {"default"}, + } + u.RawQuery = params.Encode() + urlStr := fmt.Sprintf("%v", u) + + req := httptest.NewRequest(http.MethodGet, urlStr, nil) + + // get ghz dashboard based on result in metrics client + getGRPCDashboard(w, req) + res := w.Result() + defer func() { + err := res.Body.Close() + assert.NoError(t, err) + }() + + // check the ghz dashboard + body, err := io.ReadAll(res.Body) + assert.NoError(t, err) + assert.Equal( + t, + ghzDashboardJSON, + string(body), + ) } diff --git a/storage/badgerdb/simple.go b/storage/badgerdb/simple.go index aeb550af4..d88223ba2 100644 --- a/storage/badgerdb/simple.go +++ b/storage/badgerdb/simple.go @@ -2,6 +2,7 @@ package badgerdb import ( + "encoding/json" "errors" "fmt" "os" @@ -11,6 +12,7 @@ import ( "github.com/dgraph-io/badger/v4" "github.com/imdario/mergo" + "github.com/iter8-tools/iter8/base" "github.com/iter8-tools/iter8/storage" ) @@ -312,3 +314,54 @@ func (cl Client) GetMetrics(applicationName string, version int, signature strin return &metrics, nil } + +func getExperimentResultKey(namespace, experiment string) string { + // getExperimentResultKey() is just getUserPrefix() with the user appended at the end + return fmt.Sprintf("kt-result::%s::%s", namespace, experiment) +} + +// SetExperimentResult sets the experiment result for a particular namespace and experiment name +// the data is []byte in order to make this function reusable for different tasks +func (cl Client) SetExperimentResult(namespace, experiment string, data *base.ExperimentResult) error { + dataBytes, err := json.Marshal(data) + if err != nil { + return fmt.Errorf("cannot JSON marshal ExperimentResult: %e", err) + } + + key := getExperimentResultKey(namespace, experiment) + return cl.db.Update(func(txn *badger.Txn) error { + e := badger.NewEntry([]byte(key), dataBytes).WithTTL(cl.additionalOptions.TTL) + err := txn.SetEntry(e) + return err + }) +} + +// GetExperimentResult sets the experiment result for a particular namespace and experiment name +// the data is []byte in order to make this function reusable for different tasks +func (cl Client) GetExperimentResult(namespace, experiment string) (*base.ExperimentResult, error) { + var valCopy []byte + err := cl.db.View(func(txn *badger.Txn) error { + item, err := txn.Get([]byte(getExperimentResultKey(namespace, experiment))) + if err != nil { + return fmt.Errorf("cannot get ExperimentResult with name: \"%s\" and namespace: %s: %e", experiment, namespace, err) + } + + valCopy, err = item.ValueCopy(nil) + if err != nil { + return fmt.Errorf("cannot copy value of ExperimentResult with name: \"%s\" and namespace: %s: %e", experiment, namespace, err) + } + + return nil + }) + if err != nil { + return nil, err + } + + experimentResult := base.ExperimentResult{} + err = json.Unmarshal(valCopy, &experimentResult) + if err != nil { + return nil, fmt.Errorf("cannot JSON unmarshal ExperimentResult: \"%s\": %e", string(valCopy), err) + } + + return &experimentResult, err +} diff --git a/storage/badgerdb/simple_test.go b/storage/badgerdb/simple_test.go index 1afdf47a0..8b737ecdf 100644 --- a/storage/badgerdb/simple_test.go +++ b/storage/badgerdb/simple_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/dgraph-io/badger/v4" + "github.com/iter8-tools/iter8/base" "github.com/stretchr/testify/assert" ) @@ -272,3 +273,25 @@ func TestGetMetrics(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "{}", string(jsonMetrics)) } + +func TestGetExperimentResult(t *testing.T) { + tempDirPath := t.TempDir() + + client, err := GetClient(badger.DefaultOptions(tempDirPath), AdditionalOptions{}) + assert.NoError(t, err) + + namespace := "my-namespace" + experiment := "my-experiment" + + experimentResult := base.ExperimentResult{ + Name: experiment, + Namespace: namespace, + } + + err = client.SetExperimentResult(namespace, experiment, &experimentResult) + assert.NoError(t, err) + + result, err := client.GetExperimentResult(namespace, experiment) + assert.NoError(t, err) + assert.Equal(t, &experimentResult, result) +} diff --git a/storage/interface.go b/storage/interface.go index 0f44b6471..e75669196 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -1,6 +1,8 @@ // Package storage provides the storage client for the controllers package package storage +import "github.com/iter8-tools/iter8/base" + // SummarizedMetric is a metric summary type SummarizedMetric struct { Count uint64 @@ -37,7 +39,7 @@ type VersionMetrics map[string]struct { // Interface enables interaction with a storage entity // Can be mocked in unit tests with fake implementation type Interface interface { - // Returns a nested map of the metrics data for a particular application, version, and signature + // returns a nested map of the metrics data for a particular application, version, and signature // Example: // { // "my-metric": { @@ -63,4 +65,11 @@ type Interface interface { // Example key: kt-users::my-app::0::my-signature::my-user -> true SetUser(applicationName string, version int, signature, user string) error + + // get ExperimentResult for a particular namespace and experiment + GetExperimentResult(namespace, experiment string) (*base.ExperimentResult, error) + + // called by the A/B/n SDK gRPC API implementation (SDK for application clients) + // Example key: kt-metric::my-app::0::my-signature::my-metric::my-user::my-transaction-id -> my-metric-value (get the metric value with all the provided information) + SetExperimentResult(namespace, experiment string, data *base.ExperimentResult) error } diff --git a/templates/custommetrics/istio-prom.tpl b/templates/custommetrics/istio-prom.tpl deleted file mode 100644 index 172af432b..000000000 --- a/templates/custommetrics/istio-prom.tpl +++ /dev/null @@ -1,114 +0,0 @@ -# This file provides templated metric specifications that enable -# Iter8 to retrieve metrics from Istio's Prometheus add-on. -# -# For a list of metrics supported out-of-the-box by the Istio Prometheus add-on, -# please see https://istio.io/latest/docs/reference/config/metrics/ -# -# Iter8 substitutes the placeholders in this file with values, -# and uses the resulting metric specs to query Prometheus. -# The placeholders are as follows. -# -# labels map[string]interface{} optional -# elapsedTimeSeconds int implicit -# startingTime string optional -# latencyPercentiles []int optional -# -# labels: this is the set of Prometheus labels that will be used to identify a particular -# app version. These labels will be applied to every Prometheus query. To learn more -# about what labels you can use for Prometheus, please see -# https://istio.io/latest/docs/reference/config/metrics/#labels -# -# elapsedTimeSeconds: this should not be specified directly by the user. -# It is implicitly computed by Iter8 according to the following formula -# elapsedTimeSeconds := (time.Now() - startingTime).Seconds() -# -# startingTime: By default, this is the time at which the Iter8 experiment started. -# The user can explicitly specify the startingTime for each app version -# (for example, the user can set the startingTime to the creation time of the app version) -# -# latencyPercentiles: Each item in this slice will create a new metric spec. -# For example, if this is set to [50,75,90,95], -# then, latency-p50, latency-p75, latency-p90, latency-p95 metric specs are created. - -{{- define "labels"}} -{{- range $key, $val := .labels }} -{{- if or (eq (kindOf $val) "slice") (eq (kindOf $val) "map")}} -{{- fail (printf "labels should be a primitive types but received: %s :%s" $key $val) }} -{{- end }} -{{- if eq $key "response_code"}} -{{- fail "labels should not contain 'response_code'" }} -{{- end }} - {{ $key }}="{{ $val }}", -{{- end }} -{{- end}} - -# url is the HTTP endpoint where the Prometheus service installed by Istio's Prom add-on -# can be queried for metrics - -url: {{ .istioPromURL | default "http://prometheus.istio-system:9090/api/v1/query" }} -provider: istio-prom -method: GET -metrics: -- name: request-count - type: counter - description: | - Number of requests - params: - - name: query - value: | - sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-count - type: counter - description: | - Number of unsuccessful requests - params: - - name: query - value: | - sum(last_over_time(istio_requests_total{ - response_code=~'5..', - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-rate - type: gauge - description: | - Fraction of unsuccessful requests - params: - - name: query - value: | - (sum(last_over_time(istio_requests_total{ - response_code=~'5..', - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0)) - jqExpression: .data.result.[0].value.[1] | tonumber -- name: latency-mean - type: gauge - description: | - Mean latency - params: - - name: query - value: | - (sum(last_over_time(istio_request_duration_milliseconds_sum{ - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[{{ .elapsedTimeSeconds }}s])) or on() vector(0)) - jqExpression: .data.result[0].value[1] | tonumber -{{- range $i, $p := .latencyPercentiles }} -- name: latency-p{{ $p }} - type: gauge - description: | - {{ $p }} percentile latency - params: - - name: query - value: | - histogram_quantile(0.{{ $p }}, sum(rate(istio_request_duration_milliseconds_bucket{ - {{- template "labels" $ }} - }[{{ $.elapsedTimeSeconds }}s])) by (le)) - jqExpression: .data.result[0].value[1] | tonumber -{{- end }} diff --git a/testdata/assertinputs/experiment.yaml b/testdata/assertinputs/experiment.yaml index b0f9c0009..5dc11377a 100644 --- a/testdata/assertinputs/experiment.yaml +++ b/testdata/assertinputs/experiment.yaml @@ -1,3 +1,6 @@ +metadata: + name: myName + namespace: myNamespace spec: # task 1: generate HTTP requests for application URL # collect Iter8's built-in HTTP latency and error-related metrics @@ -7,171 +10,10 @@ spec: errorRanges: - lower: 500 url: https://httpbin.org/get - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - rewards: - max: - - "http/latency-mean" - - "http/latency-p50" - min: - - "http/error-rate" - SLOs: - Upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" result: failure: false insights: - rewards: - max: - - "http/latency-mean" - - "http/latency-p50" - min: - - "http/error-rate" - rewardsWinners: - max: - - 0 - - 0 - min: - - 0 - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - SLOsSatisfied: - upper: - - - true - - - true - - - true - - - true - - - true - - - true - histMetricValues: - - http/latency: - - count: 3 - lower: 11.388804 - upper: 12 - - count: 7 - lower: 12 - upper: 14 - - count: 4 - lower: 14 - upper: 16 - - count: 1 - lower: 16 - upper: 18.000000000000004 - - count: 1 - lower: 250 - upper: 272.838867 - metricsInfo: - http/error-count: - description: number of responses that were errors - type: Counter - http/error-rate: - description: fraction of responses that were errors - type: Gauge - http/latency: - description: Latency Histogram - type: Histogram - units: msec - http/latency-max: - description: maximum of observed latency values - type: Gauge - units: msec - http/latency-mean: - description: mean of observed latency values - type: Gauge - units: msec - http/latency-min: - description: minimum of observed latency values - type: Gauge - units: msec - http/latency-p50: - description: 50-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p75: - description: 75-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p90: - description: 90-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p95: - description: 95-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99: - description: 99-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99.9: - description: 99.9-th percentile of observed latency values - type: Gauge - units: msec - http/latency-stddev: - description: standard deviation of observed latency values - type: Gauge - units: msec - http/request-count: - description: number of requests sent - type: Counter - nonHistMetricValues: - - http/error-count: - - 0 - http/error-rate: - - 0 - http/latency-max: - - 272.838867 - http/latency-mean: - - 29.624432499999998 - http/latency-min: - - 11.388804 - http/latency-p50: - - 13.428571428571429 - http/latency-p75: - - 15 - http/latency-p90: - - 16.8 - http/latency-p95: - - 254.56777339999996 - http/latency-p99: - - 269.18464828 - http/latency-p99.9: - - 272.473445128 - http/latency-stddev: - - 62.81583554772398 - http/request-count: - - 16 numVersions: 1 iter8Version: v0.13 - numCompletedTasks: 4 - startTime: "2022-03-16T10:22:58.540897-04:00" + numCompletedTasks: 1 + startTime: "2022-03-16T10:22:58.540897-04:00" \ No newline at end of file diff --git a/testdata/assertinputs/experimentWithLowerSLOs.yaml b/testdata/assertinputs/experimentWithLowerSLOs.yaml deleted file mode 100644 index 35773f728..000000000 --- a/testdata/assertinputs/experimentWithLowerSLOs.yaml +++ /dev/null @@ -1,172 +0,0 @@ -spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: https://httpbin.org/get - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - Upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - Lower: - - metric: "user/engagement" - limit: 10000000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" -result: - failure: false - insights: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - lower: - - metric: "user/engagement" - limit: 10000000 - SLOsSatisfied: - upper: - - - true - - - true - - - true - - - true - - - true - - - true - lower: - - - true - histMetricValues: - - http/latency: - - count: 3 - lower: 11.388804 - upper: 12 - - count: 7 - lower: 12 - upper: 14 - - count: 4 - lower: 14 - upper: 16 - - count: 1 - lower: 16 - upper: 18.000000000000004 - - count: 1 - lower: 250 - upper: 272.838867 - metricsInfo: - http/error-count: - description: number of responses that were errors - type: Counter - http/error-rate: - description: fraction of responses that were errors - type: Gauge - http/latency: - description: Latency Histogram - type: Histogram - units: msec - http/latency-max: - description: maximum of observed latency values - type: Gauge - units: msec - http/latency-mean: - description: mean of observed latency values - type: Gauge - units: msec - http/latency-min: - description: minimum of observed latency values - type: Gauge - units: msec - http/latency-p50: - description: 50-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p75: - description: 75-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p90: - description: 90-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p95: - description: 95-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99: - description: 99-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99.9: - description: 99.9-th percentile of observed latency values - type: Gauge - units: msec - http/latency-stddev: - description: standard deviation of observed latency values - type: Gauge - units: msec - http/request-count: - description: number of requests sent - type: Counter - user/engagement: - description: number of user engagements - type: Counter - nonHistMetricValues: - - http/error-count: - - 0 - http/error-rate: - - 0 - http/latency-max: - - 272.838867 - http/latency-mean: - - 29.624432499999998 - http/latency-min: - - 11.388804 - http/latency-p50: - - 13.428571428571429 - http/latency-p75: - - 15 - http/latency-p90: - - 16.8 - http/latency-p95: - - 254.56777339999996 - http/latency-p99: - - 269.18464828 - http/latency-p99.9: - - 272.473445128 - http/latency-stddev: - - 62.81583554772398 - http/request-count: - - 16 - user/engagement: - - 100000000 - numVersions: 1 - iter8Version: v0.13 - numCompletedTasks: 4 - startTime: "2022-03-16T10:22:58.540897-04:00" diff --git a/testdata/assertinputs/noinsights/experiment.yaml b/testdata/assertinputs/noinsights/experiment.yaml deleted file mode 100644 index 390f55985..000000000 --- a/testdata/assertinputs/noinsights/experiment.yaml +++ /dev/null @@ -1,37 +0,0 @@ -spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: https://httpbin.org/get - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - Upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" -result: - failure: false - iter8Version: v0.13 - numCompletedTasks: 4 - startTime: "2022-03-16T10:22:58.540897-04:00" diff --git a/testdata/assertinputsfail/.gitignore b/testdata/assertinputsfail/.gitignore deleted file mode 100644 index 4e9ba03c6..000000000 --- a/testdata/assertinputsfail/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -!experiment.yaml -!result.yaml \ No newline at end of file diff --git a/testdata/assertinputsfail/experiment.yaml b/testdata/assertinputsfail/experiment.yaml deleted file mode 100644 index bfd0f2fdc..000000000 --- a/testdata/assertinputsfail/experiment.yaml +++ /dev/null @@ -1,159 +0,0 @@ -spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: https://httpbin.org/get - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - Upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" -result: - failure: false - insights: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - SLOsSatisfied: - upper: - - - false - - - true - - - true - - - true - - - true - - - true - histMetricValues: - - http/latency: - - count: 3 - lower: 11.388804 - upper: 12 - - count: 7 - lower: 12 - upper: 14 - - count: 4 - lower: 14 - upper: 16 - - count: 1 - lower: 16 - upper: 18.000000000000004 - - count: 1 - lower: 250 - upper: 272.838867 - metricsInfo: - http/error-count: - description: number of responses that were errors - type: Counter - http/error-rate: - description: fraction of responses that were errors - type: Gauge - http/latency: - description: Latency Histogram - type: Histogram - units: msec - http/latency-max: - description: maximum of observed latency values - type: Gauge - units: msec - http/latency-mean: - description: mean of observed latency values - type: Gauge - units: msec - http/latency-min: - description: minimum of observed latency values - type: Gauge - units: msec - http/latency-p50: - description: 50-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p75: - description: 75-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p90: - description: 90-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p95: - description: 95-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99: - description: 99-th percentile of observed latency values - type: Gauge - units: msec - http/latency-p99.9: - description: 99.9-th percentile of observed latency values - type: Gauge - units: msec - http/latency-stddev: - description: standard deviation of observed latency values - type: Gauge - units: msec - http/request-count: - description: number of requests sent - type: Counter - nonHistMetricValues: - - http/error-count: - - 0 - http/error-rate: - - 0 - http/latency-max: - - 272.838867 - http/latency-mean: - - 29.624432499999998 - http/latency-min: - - 11.388804 - http/latency-p50: - - 13.428571428571429 - http/latency-p75: - - 15 - http/latency-p90: - - 16.8 - http/latency-p95: - - 254.56777339999996 - http/latency-p99: - - 269.18464828 - http/latency-p99.9: - - 272.473445128 - http/latency-stddev: - - 62.81583554772398 - http/request-count: - - 16 - numVersions: 1 - iter8Version: v0.13 - numCompletedTasks: 4 - startTime: "2022-03-16T10:22:58.540897-04:00" diff --git a/testdata/autox_inputs/config.empty.yaml b/testdata/autox_inputs/config.empty.yaml deleted file mode 100644 index e69de29bb..000000000 diff --git a/testdata/autox_inputs/config.example.yaml b/testdata/autox_inputs/config.example.yaml deleted file mode 100644 index e4b62861e..000000000 --- a/testdata/autox_inputs/config.example.yaml +++ /dev/null @@ -1,28 +0,0 @@ -specs: - myApp: - trigger: - name: myApp - namespace: default - group: "apps" - version: "v1" - resource: "deployments" - releaseSpecs: - name1: - name: abc - version: 1.0.0 - name2: - name: def - values: - hello: world - version: 1.0.0 - myApp2: - trigger: - name: myApp2 - namespace: test - group: "apps" - version: "v1" - resource: "deployments" - releaseSpecs: - name3: - name: ghi - version: 1.0.0 diff --git a/testdata/autox_inputs/config.garbage.yaml b/testdata/autox_inputs/config.garbage.yaml deleted file mode 100644 index 8b0ce0c41..000000000 --- a/testdata/autox_inputs/config.garbage.yaml +++ /dev/null @@ -1 +0,0 @@ -invalid yaml file \ No newline at end of file diff --git a/testdata/autox_inputs/config.invalid.yaml b/testdata/autox_inputs/config.invalid.yaml deleted file mode 100644 index 7daacd5db..000000000 --- a/testdata/autox_inputs/config.invalid.yaml +++ /dev/null @@ -1 +0,0 @@ -foo: bar \ No newline at end of file diff --git a/testdata/config.yaml b/testdata/config.yaml deleted file mode 100644 index 90dce61ec..000000000 --- a/testdata/config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -http: - url: "https://httpbin.org/get" -assess: - SLOs: - upper: - http/latency-mean: 500 \ No newline at end of file diff --git a/testdata/controllers/mirror/default-routing.sh b/testdata/controllers/mirror/default-routing.sh index 33214356e..8e1c28de5 100755 --- a/testdata/controllers/mirror/default-routing.sh +++ b/testdata/controllers/mirror/default-routing.sh @@ -49,7 +49,7 @@ metadata: labels: app.kubernetes.io/managed-by: iter8 iter8.tools/kind: routemap - iter8.tools/version: v0.15 + iter8.tools/version: v0.15 data: strSpec: | versions: diff --git a/testdata/custommetrics/istio-prom.tpl b/testdata/custommetrics/istio-prom.tpl deleted file mode 100644 index 0b8b1d818..000000000 --- a/testdata/custommetrics/istio-prom.tpl +++ /dev/null @@ -1,118 +0,0 @@ -# This file provides templated metric specifications that enable -# Iter8 to retrieve metrics from Istio's Prometheus add-on. -# -# For a list of metrics supported out-of-the-box by the Istio Prometheus add-on, -# please see https://istio.io/latest/docs/reference/config/metrics/ -# -# Iter8 substitutes the placeholders in this file with values, -# and uses the resulting metric specs to query Prometheus. -# The placeholders are as follows. -# -# labels map[string]interface{} optional -# elapsedTimeSeconds int implicit -# startingTime string optional -# latencyPercentiles []int optional -# -# labels: this is the set of Prometheus labels that will be used to identify a particular -# app version. These labels will be applied to every Prometheus query. To learn more -# about what labels you can use for Prometheus, please see -# https://istio.io/latest/docs/reference/config/metrics/#labels -# -# elapsedTimeSeconds: this should not be specified directly by the user. -# It is implicitly computed by Iter8 according to the following formula -# elapsedTimeSeconds := (time.Now() - startingTime).Seconds() -# -# startingTime: By default, this is the time at which the Iter8 experiment started. -# The user can explicitly specify the startingTime for each app version -# (for example, the user can set the startingTime to the creation time of the app version) -# -# latencyPercentiles: Each item in this slice will create a new metric spec. -# For example, if this is set to [50,75,90,95], -# then, latency-p50, latency-p75, latency-p90, latency-p95 metric specs are created. - -# -# For testing purposes, hardcoded elapsedTimeSeconds to be 0 -# - -{{- define "labels"}} -{{- range $key, $val := .labels }} -{{- if or (eq (kindOf $val) "slice") (eq (kindOf $val) "map")}} -{{- fail (printf "labels should be a primitive types but received: %s :%s" $key $val) }} -{{- end }} -{{- if eq $key "response_code"}} -{{- fail "labels should not contain 'response_code'" }} -{{- end }} - {{ $key }}="{{ $val }}", -{{- end }} -{{- end}} - -# url is the HTTP endpoint where the Prometheus service installed by Istio's Prom add-on -# can be queried for metrics - -url: {{ .istioPromURL | default "http://prometheus.istio-system:9090/api/v1/query" }} -provider: istio-prom -method: GET -metrics: -- name: request-count - type: counter - description: | - Number of requests - params: - - name: query - value: | - sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[0s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-count - type: counter - description: | - Number of unsuccessful requests - params: - - name: query - value: | - sum(last_over_time(istio_requests_total{ - response_code=~'5..', - {{- template "labels" . }} - }[0s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-rate - type: gauge - description: | - Fraction of unsuccessful requests - params: - - name: query - value: | - (sum(last_over_time(istio_requests_total{ - response_code=~'5..', - {{- template "labels" . }} - }[0s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[0s])) or on() vector(0)) - jqExpression: .data.result.[0].value.[1] | tonumber -- name: latency-mean - type: gauge - description: | - Mean latency - params: - - name: query - value: | - (sum(last_over_time(istio_request_duration_milliseconds_sum{ - {{- template "labels" . }} - }[0s])) or on() vector(0))/(sum(last_over_time(istio_requests_total{ - {{- template "labels" . }} - }[0s])) or on() vector(0)) - jqExpression: .data.result[0].value[1] | tonumber -{{- range $i, $p := .latencyPercentiles }} -- name: latency-p{{ $p }} - type: gauge - description: | - {{ $p }} percentile latency - params: - - name: query - value: | - histogram_quantile(0.{{ $p }}, sum(rate(istio_request_duration_milliseconds_bucket{ - {{- template "labels" $ }} - }[0s])) by (le)) - jqExpression: .data.result[0].value[1] | tonumber -{{- end }} \ No newline at end of file diff --git a/testdata/custommetrics/nan.tpl b/testdata/custommetrics/nan.tpl deleted file mode 100644 index c59103d4e..000000000 --- a/testdata/custommetrics/nan.tpl +++ /dev/null @@ -1,20 +0,0 @@ -url: http://url/query -provider: nan-prom -method: GET -metrics: - -- name: metric-tonumber - type: counter - description: tonumber - params: - - name: query - value: query-tonumber - jqExpression: .value | tonumber - -- name: metric-no-tonumber - type: counter - description: no-tonumber - params: - - name: query - value: query-no-tonumber - jqExpression: .value diff --git a/testdata/custommetrics/test-ce.tpl b/testdata/custommetrics/test-ce.tpl deleted file mode 100644 index d2ee026bf..000000000 --- a/testdata/custommetrics/test-ce.tpl +++ /dev/null @@ -1,82 +0,0 @@ -# endpoint where the monitoring instance is available -# https://cloud.ibm.com/docs/monitoring?topic=monitoring-endpoints#endpoints_sysdig -url: test-database.com/prometheus/api/v1/query # e.g. https://ca-tor.monitoring.cloud.ibm.com -headers: - # IAM token - # to get the token, run: ibmcloud iam oauth-tokens | grep IAM | cut -d \: -f 2 | sed 's/^ *//' - Authorization: Bearer test-token - # GUID of the IBM Cloud Monitoring instance - # to get the GUID, run: ibmcloud resource service-instance --output json | jq -r '.[].guid' - # https://cloud.ibm.com/docs/monitoring?topic=monitoring-mon-curl - IBMInstanceID: test-guid -provider: test-ce -method: GET -# Inputs for the template: -# ibm_codeengine_application_name string -# ibm_codeengine_gateway_instance string -# ibm_codeengine_namespace string -# ibm_codeengine_project_name string -# ibm_codeengine_revision_name string -# ibm_codeengine_status string -# ibm_ctype string -# ibm_location string -# ibm_scope string -# ibm_service_instance string -# ibm_service_name string -# -# Inputs for the metrics (output of template): -# ibm_codeengine_revision_name string -# startingTime string -# -# Note: elapsedTimeSeconds is produced by Iter8 - -# -# For testing purposes hardcoded elapsedTimeSeconds to 0 -# - -metrics: -- name: request-count - type: counter - description: | - Number of requests - params: - - name: query - value: | - sum(last_over_time(ibm_codeengine_application_requests_total{ - {{- if .ibm_codeengine_revision_name }} - ibm_codeengine_revision_name="{{.ibm_codeengine_revision_name}}", - {{- end }} - }[0s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-count - type: counter - description: | - Number of non-successful requests - params: - - name: query - value: | - sum(last_over_time(ibm_codeengine_application_requests_total{ - ibm_codeengine_status!="200", - {{- if .ibm_codeengine_revision_name }} - ibm_codeengine_revision_name="{{.ibm_codeengine_revision_name}}", - {{- end }} - }[0s])) or on() vector(0) - jqExpression: .data.result[0].value[1] | tonumber -- name: error-rate - type: gauge - description: | - Percentage of non-successful requests - params: - - name: query - value: | - sum(last_over_time(ibm_codeengine_application_requests_total{ - ibm_codeengine_status!="200", - {{- if .ibm_codeengine_revision_name }} - ibm_codeengine_revision_name="{{.ibm_codeengine_revision_name}}", - {{- end }} - }[0s])) or on() vector(0)/sum(last_over_time(ibm_codeengine_application_requests_total{ - {{- if .ibm_codeengine_revision_name }} - ibm_codeengine_revision_name="{{.ibm_codeengine_revision_name}}", - {{- end }} - }[0s])) or on() vector(0) - jqExpression: .data.result.[0].value.[1] | tonumber \ No newline at end of file diff --git a/testdata/custommetrics/test-request-body.tpl b/testdata/custommetrics/test-request-body.tpl deleted file mode 100644 index ac5dd1292..000000000 --- a/testdata/custommetrics/test-request-body.tpl +++ /dev/null @@ -1,16 +0,0 @@ -url: test-database.com/prometheus/api/v1/query -provider: test-request-body -method: GET -# Note: elapsedTimeSeconds is produced by Iter8 -metrics: -- name: request-count - type: counter - description: | - Number of requests - body: | - example request body - params: - - name: query - value: | - example query parameter - jqExpression: .data.result[0].value[1] | tonumber \ No newline at end of file diff --git a/testdata/drivertests/experiment.tpl b/testdata/drivertests/experiment.tpl index de5575b44..23afa3a53 100644 --- a/testdata/drivertests/experiment.tpl +++ b/testdata/drivertests/experiment.tpl @@ -1,32 +1,12 @@ +metadata: + name: myName + namespace: myNamespace spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: {{ .URL }} - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1000 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" +# task 1: generate HTTP requests for application URL +# collect Iter8's built-in HTTP latency and error-related metrics +- task: http + with: + duration: 2s + errorRanges: + - lower: 500 + url: {{ .URL }} diff --git a/testdata/experiment.tpl b/testdata/experiment.tpl index adc7023d8..4075144bf 100644 --- a/testdata/experiment.tpl +++ b/testdata/experiment.tpl @@ -1,32 +1,12 @@ +metadata: + name: myName + namespace: myNamespace spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: {{ .URL }} - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 5000 - - metric: "http/latency-p50" - limit: 5000 - - metric: "http/latency-p50.0" - limit: 5000 - - metric: "http/latency-p95.0" - limit: 5000 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" +# task 1: generate HTTP requests for application URL +# collect Iter8's built-in HTTP latency and error-related metrics +- task: http + with: + duration: 2s + errorRanges: + - lower: 500 + url: {{ .URL }} \ No newline at end of file diff --git a/testdata/experiment.yaml b/testdata/experiment.yaml index 18df69f27..36b734050 100644 --- a/testdata/experiment.yaml +++ b/testdata/experiment.yaml @@ -1,32 +1,9 @@ spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: https://httpbin.org/get - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 5000 - - metric: "http/latency-p50" - limit: 5000 - - metric: "http/latency-p50.0" - limit: 5000 - - metric: "http/latency-p95.0" - limit: 5000 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" +# task 1: generate HTTP requests for application URL +# collect Iter8's built-in HTTP latency and error-related metrics +- task: http + with: + duration: 2s + errorRanges: + - lower: 500 + url: https://httpbin.org/get diff --git a/testdata/experiment_db.yaml b/testdata/experiment_db.yaml deleted file mode 100644 index e788c197b..000000000 --- a/testdata/experiment_db.yaml +++ /dev/null @@ -1,22 +0,0 @@ -spec: - # task 1: collect custom metrics - - task: custommetrics - with: - templates: - kfserving: https://raw.githubusercontent.com/iter8-tools/iter8/master/testdata/metrics/kfserving.metrics.yaml - values: - namespace_name: ns-candidate - startingTime: Jan 2, 2006 at 3:04pm (MST) - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - Upper: - - metric: "kfserving/request-count" - limit: 0 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" diff --git a/testdata/experiment_fails.tpl b/testdata/experiment_fails.tpl deleted file mode 100644 index 87347fe90..000000000 --- a/testdata/experiment_fails.tpl +++ /dev/null @@ -1,32 +0,0 @@ -spec: - # task 1: generate HTTP requests for application URL - # collect Iter8's built-in HTTP latency and error-related metrics - - task: http - with: - duration: 2s - errorRanges: - - lower: 500 - url: {{ .URL }} - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - upper: - - metric: "http/error-rate" - limit: 0 - - metric: "http/latency-mean" - limit: 500 - - metric: "http/latency-p50" - limit: 1000 - - metric: "http/latency-p50.0" - limit: 1 - - metric: "http/latency-p95.0" - limit: 2500 - - metric: "http/latency-p99" - limit: 5000 - # tasks 3 & 4: print if SLOs are satisfied or not - - if: SLOs() - run: echo "SLOs satisfied" - - if: not SLOs() - run: echo "SLOs not satisfied" diff --git a/testdata/experiment_grpc.yaml b/testdata/experiment_grpc.yaml index 49d901ec6..60bc0169d 100644 --- a/testdata/experiment_grpc.yaml +++ b/testdata/experiment_grpc.yaml @@ -1,35 +1,14 @@ spec: - # task 1: generate gRPC requests for application - # collect Iter8's built-in gRPC latency and error-related metrics - - task: grpc - with: - total: 200 - concurrency: 5 - data: - name: bob - timeout: 10s - connect-timeeout: 5s - protoURL: "https://raw.githubusercontent.com/bojand/ghz/v0.105.0/testdata/greeter.proto" - call: "helloworld.Greeter.SayHello" - host: "127.0.0.1" - # task 2: validate service level objectives for app using - # the metrics collected in the above task - - task: assess - with: - SLOs: - Upper: - - metric: "grpc/error-rate" - limit: 0 - - metric: "grpc/latency/mean" - limit: 100 - - metric: "grpc/latency/p50" - limit: 100 - - metric: "grpc/latency/p50.0" - limit: 100 - - metric: "grpc/latency/p95.0" - limit: 250 - - metric: "grpc/latency/p97.5" - limit: 500 - # task 3: conditional run task - - if: SLOs() - run: echo "all good" +# task 1: generate gRPC requests for application +# collect Iter8's built-in gRPC latency and error-related metrics +- task: grpc + with: + total: 200 + concurrency: 5 + data: + name: bob + timeout: 10s + connect-timeeout: 5s + protoURL: "https://raw.githubusercontent.com/bojand/ghz/v0.105.0/testdata/greeter.proto" + call: "helloworld.Greeter.SayHello" + host: "127.0.0.1" \ No newline at end of file diff --git a/testdata/output/assert-slos.txt b/testdata/output/assert-slos.txt deleted file mode 100644 index 507879b72..000000000 --- a/testdata/output/assert-slos.txt +++ /dev/null @@ -1,4 +0,0 @@ -time=1977-09-02 22:04:05 level=info msg=experiment completed -time=1977-09-02 22:04:05 level=info msg=experiment has no failure -time=1977-09-02 22:04:05 level=info msg=SLOs are satisfied -time=1977-09-02 22:04:05 level=info msg=all conditions were satisfied diff --git a/testdata/output/kassert.txt b/testdata/output/kassert.txt index 507879b72..63684dab8 100644 --- a/testdata/output/kassert.txt +++ b/testdata/output/kassert.txt @@ -1,4 +1,3 @@ time=1977-09-02 22:04:05 level=info msg=experiment completed time=1977-09-02 22:04:05 level=info msg=experiment has no failure -time=1977-09-02 22:04:05 level=info msg=SLOs are satisfied time=1977-09-02 22:04:05 level=info msg=all conditions were satisfied diff --git a/testdata/output/kassertfails.txt b/testdata/output/kassertfails.txt deleted file mode 100644 index e180b06b0..000000000 --- a/testdata/output/kassertfails.txt +++ /dev/null @@ -1,9 +0,0 @@ -time=1977-09-02 22:04:05 level=info msg=experiment completed -time=1977-09-02 22:04:05 level=info msg=experiment has no failure -time=1977-09-02 22:04:05 level=info msg=SLOs are not satisfied -time=1977-09-02 22:04:05 level=info msg=experiment completed -time=1977-09-02 22:04:05 level=info msg=experiment has no failure -time=1977-09-02 22:04:05 level=info msg=SLOs are not satisfied -time=1977-09-02 22:04:05 level=info msg=not all conditions were satisfied -time=1977-09-02 22:04:05 level=error msg=assert conditions failed -time=1977-09-02 22:04:05 level=error msg=assert conditions failed diff --git a/testdata/output/kreport.txt b/testdata/output/kreport.txt deleted file mode 100644 index f7f0f6603..000000000 --- a/testdata/output/kreport.txt +++ /dev/null @@ -1,43 +0,0 @@ - -Experiment summary: -******************* - - Experiment completed: true - No task failures: true - Total number of tasks: 4 - Number of completed tasks: 4 - Number of completed loops: 0 - -Whether or not service level objectives (SLOs) are satisfied: -************************************************************* - - SLO Conditions | Satisfied - -------------- | --------- - http/error-rate <= 0 | true - http/latency-mean (msec) <= 500 | true - http/latency-p50 (msec) <= 1000 | true - http/latency-p50 (msec) <= 1000 | true - http/latency-p95 (msec) <= 2500 | true - http/latency-p99 (msec) <= 5000 | true - - -Latest observed values for metrics: -*********************************** - - Metric | value - ------- | ----- - http/error-count | 0.00 - http/error-rate | 0.00 - http/latency-max (msec) | 272.84 - http/latency-mean (msec) | 29.62 - http/latency-min (msec) | 11.39 - http/latency-p50 (msec) | 13.43 - http/latency-p75 (msec) | 15.00 - http/latency-p90 (msec) | 16.80 - http/latency-p95 (msec) | 254.57 - http/latency-p99 (msec) | 269.18 - http/latency-p99.9 (msec) | 272.47 - http/latency-stddev (msec) | 62.82 - http/request-count | 16.00 - - diff --git a/testdata/output/krun.txt b/testdata/output/krun.txt index 08360495e..a5fc32559 100644 --- a/testdata/output/krun.txt +++ b/testdata/output/krun.txt @@ -1,9 +1,2 @@ time=1977-09-02 22:04:05 level=info msg=task 1: http: started time=1977-09-02 22:04:05 level=info msg=task 1: http: completed -time=1977-09-02 22:04:05 level=info msg=task 2: assess: started -time=1977-09-02 22:04:05 level=info msg=task 2: assess: completed -time=1977-09-02 22:04:05 level=info msg=task 3: run: started -time=1977-09-02 22:04:05 level=info msg=task 3: run: completed -time=1977-09-02 22:04:05 level=info msg=task 4: run: started -time=1977-09-02 22:04:05 level=info msg=task 4: run: skipped stack-trace=below ... -::Trace:: false condition: not SLOs() diff --git a/testdata/output/launch-with-slos.txt b/testdata/output/launch-with-slos.txt deleted file mode 100644 index e89d48487..000000000 --- a/testdata/output/launch-with-slos.txt +++ /dev/null @@ -1,6 +0,0 @@ -time=1977-09-02 22:04:05 level=info msg=created experiment.yaml file -time=1977-09-02 22:04:05 level=info msg=starting local experiment -time=1977-09-02 22:04:05 level=info msg=task 1: http: started -time=1977-09-02 22:04:05 level=info msg=task 1: http: completed -time=1977-09-02 22:04:05 level=info msg=task 2: assess: started -time=1977-09-02 22:04:05 level=info msg=task 2: assess: completed diff --git a/testdata/output/report.txt b/testdata/output/report.txt deleted file mode 100644 index ef137893e..000000000 --- a/testdata/output/report.txt +++ /dev/null @@ -1,42 +0,0 @@ - -Experiment summary: -******************* - - Experiment completed: true - No task failures: true - Total number of tasks: 4 - Number of completed tasks: 4 - -Whether or not service level objectives (SLOs) are satisfied: -************************************************************* - - SLO Conditions | Satisfied - -------------- | --------- - http/error-rate <= 0 | true - http/latency-mean (msec) <= 500 | true - http/latency-p50 (msec) <= 1000 | true - http/latency-p50 (msec) <= 1000 | true - http/latency-p95 (msec) <= 2500 | true - http/latency-p99 (msec) <= 5000 | true - - -Latest observed values for metrics: -*********************************** - - Metric | value - ------- | ----- - http/error-count | 0.00 - http/error-rate | 0.00 - http/latency-max (msec) | 272.84 - http/latency-mean (msec) | 29.62 - http/latency-min (msec) | 11.39 - http/latency-p50 (msec) | 13.43 - http/latency-p75 (msec) | 15.00 - http/latency-p90 (msec) | 16.80 - http/latency-p95 (msec) | 254.57 - http/latency-p99 (msec) | 269.18 - http/latency-p99.9 (msec) | 272.47 - http/latency-stddev (msec) | 62.82 - http/request-count | 16.00 - - diff --git a/testdata/output/run.txt b/testdata/output/run.txt deleted file mode 100644 index 08360495e..000000000 --- a/testdata/output/run.txt +++ /dev/null @@ -1,9 +0,0 @@ -time=1977-09-02 22:04:05 level=info msg=task 1: http: started -time=1977-09-02 22:04:05 level=info msg=task 1: http: completed -time=1977-09-02 22:04:05 level=info msg=task 2: assess: started -time=1977-09-02 22:04:05 level=info msg=task 2: assess: completed -time=1977-09-02 22:04:05 level=info msg=task 3: run: started -time=1977-09-02 22:04:05 level=info msg=task 3: run: completed -time=1977-09-02 22:04:05 level=info msg=task 4: run: started -time=1977-09-02 22:04:05 level=info msg=task 4: run: skipped stack-trace=below ... -::Trace:: false condition: not SLOs()