|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euf -o pipefail |
| 4 | + |
| 5 | +compose_file=docker-compose/monitor/docker-compose.yml |
| 6 | +timeout=300 |
| 7 | +end_time=$((SECONDS + timeout)) |
| 8 | +success="false" |
| 9 | + |
| 10 | +check_service_health() { |
| 11 | + local service_name=$1 |
| 12 | + local url=$2 |
| 13 | + echo "Checking health of service: $service_name at $url" |
| 14 | + |
| 15 | + local wait_seconds=10 |
| 16 | + local curl_params=( |
| 17 | + --silent |
| 18 | + --output |
| 19 | + /dev/null |
| 20 | + --write-out |
| 21 | + "%{http_code}" |
| 22 | + ) |
| 23 | + while [ $SECONDS -lt $end_time ]; do |
| 24 | + if [[ "$(curl "${curl_params[@]}" "${url}")" == "200" ]]; then |
| 25 | + echo "✅ $service_name is healthy" |
| 26 | + return 0 |
| 27 | + fi |
| 28 | + echo "Waiting for $service_name to be healthy..." |
| 29 | + sleep $wait_seconds |
| 30 | + done |
| 31 | + |
| 32 | + echo "❌ ERROR: $service_name did not become healthy in time" |
| 33 | + return 1 |
| 34 | +} |
| 35 | + |
| 36 | +# Function to check if all services are healthy |
| 37 | +wait_for_services() { |
| 38 | + echo "Waiting for services to be up and running..." |
| 39 | + check_service_health "Jaeger" "http://localhost:16686" |
| 40 | + check_service_health "Prometheus" "http://localhost:9090/graph" |
| 41 | + # Grafana is not actually important for the functional test, |
| 42 | + # but it at least validates that the docker-compose file is correct. |
| 43 | + check_service_health "Grafana" "http://localhost:3000" |
| 44 | +} |
| 45 | + |
| 46 | +# Function to validate the service metrics |
| 47 | +validate_service_metrics() { |
| 48 | + local service=$1 |
| 49 | + # Time constants in milliseconds |
| 50 | + local fiveMinutes=300000 |
| 51 | + local oneMinute=60000 |
| 52 | + local fifteenSec=15000 # Prometheus is also configured to scrape every 15sec. |
| 53 | + # When endTs=(blank) the server will default it to now(). |
| 54 | + local url="http://localhost:16686/api/metrics/calls?service=${service}&endTs=&lookback=${fiveMinutes}&step=${fifteenSec}&ratePer=${oneMinute}" |
| 55 | + response=$(curl -s "$url") |
| 56 | + service_name=$(echo "$response" | jq -r 'if .metrics and .metrics[0] then .metrics[0].labels[] | select(.name=="service_name") | .value else empty end') |
| 57 | + if [ "$service_name" != "$service" ]; then |
| 58 | + echo "⏳ No metrics found for service '$service'" |
| 59 | + return 1 |
| 60 | + fi |
| 61 | + # Store the values in an array |
| 62 | + mapfile -t metric_points < <(echo "$response" | jq -r '.metrics[0].metricPoints[].gaugeValue.doubleValue') |
| 63 | + echo "Metric datapoints found for service '$service': " "${metric_points[@]}" |
| 64 | + # Check that all values are non-zero |
| 65 | + local non_zero_count=0 |
| 66 | + for value in "${metric_points[@]}"; do |
| 67 | + if [[ $(echo "$value > 0.0" | bc) == "1" ]]; then |
| 68 | + non_zero_count=$((non_zero_count + 1)) |
| 69 | + else |
| 70 | + echo "❌ ERROR: Zero values not expected" |
| 71 | + return 1 |
| 72 | + fi |
| 73 | + done |
| 74 | + if [ $non_zero_count -lt 3 ]; then |
| 75 | + echo "⏳ Expecting at least 3 non-zero data points" |
| 76 | + return 1 |
| 77 | + fi |
| 78 | + return 0 |
| 79 | +} |
| 80 | + |
| 81 | +check_spm() { |
| 82 | + local wait_seconds=10 |
| 83 | + local successful_service=0 |
| 84 | + services_list=("driver" "customer" "mysql" "redis" "frontend" "route" "ui") |
| 85 | + for service in "${services_list[@]}"; do |
| 86 | + echo "Processing service: $service" |
| 87 | + while [ $SECONDS -lt $end_time ]; do |
| 88 | + if validate_service_metrics "$service"; then |
| 89 | + echo "✅ Found all expected metrics for service '$service'" |
| 90 | + successful_service=$((successful_service + 1)) |
| 91 | + break |
| 92 | + fi |
| 93 | + sleep $wait_seconds |
| 94 | + done |
| 95 | + done |
| 96 | + if [ $successful_service -lt ${#services_list[@]} ]; then |
| 97 | + echo "❌ ERROR: Expected metrics from ${#services_list[@]} services, found only ${successful_service}" |
| 98 | + exit 1 |
| 99 | + else |
| 100 | + echo "✅ All services metrics are returned by the API" |
| 101 | + fi |
| 102 | +} |
| 103 | + |
| 104 | +dump_logs() { |
| 105 | + echo "::group:: docker logs" |
| 106 | + docker compose -f $compose_file logs |
| 107 | + echo "::endgroup::" |
| 108 | +} |
| 109 | + |
| 110 | +teardown_services() { |
| 111 | + if [[ "$success" == "false" ]]; then |
| 112 | + dump_logs |
| 113 | + fi |
| 114 | + docker compose -f $compose_file down |
| 115 | +} |
| 116 | + |
| 117 | +main() { |
| 118 | + (cd docker-compose/monitor && make build && make dev DOCKER_COMPOSE_ARGS="-d") |
| 119 | + wait_for_services |
| 120 | + check_spm |
| 121 | + success="true" |
| 122 | +} |
| 123 | + |
| 124 | +trap teardown_services EXIT INT |
| 125 | + |
| 126 | +# Run the main function |
| 127 | +main |
0 commit comments