Skip to content

Commit 75b78cd

Browse files
[ci] Scrape and verify metrics at the end of e2e tests (jaegertracing#6330)
## Which problem is this PR solving? - Part of jaegertracing#6278 ## Description of the changes - scrape script and usage in cit workflow - diff calculating script - cache save and restore from main workflow runs - A sample diff (txt) ``` --- +++ @@ -303,2 +303,2 @@ -rpc_server_requests_per_rpc{le="+Inf",rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TestService",rpc_system="grpc",service_name="jaeger",service_version=""} -rpc_server_requests_per_rpc{le="0",rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version=""} +rpc_server_requests_per_rpc{le="+Inf",rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version=""} +rpc_server_requests_per_rpc{le="0",rpc_grpc_status_code="1",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version=""} @@ -321 +321 @@ -rpc_server_response_size{le="+Inf",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version=""} +rpc_server_response_size{le="+Inf",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version="",test_change="Export"} @@ -338 +338 @@ -rpc_server_response_size{rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="jaeger",service_version=""} +rpc_server_response_size{rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",service_name="test-jaeger",service_version=""} ``` ## How was this change tested? - ## Checklist - [x] I have read https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md - [x] I have signed all commits - [ ] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - for `jaeger`: `make lint test` - for `jaeger-ui`: `yarn lint` and `yarn test` --------- Signed-off-by: chahatsagarmain <[email protected]> Signed-off-by: chahat sagar <[email protected]> Signed-off-by: Yuri Shkuro <[email protected]> Co-authored-by: Yuri Shkuro <[email protected]>
1 parent 2ee8e4c commit 75b78cd

12 files changed

+215
-16
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (c) 2023 The Jaeger Authors.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
name: 'Verify Metric Snapshot and Upload Metrics'
5+
description: 'Upload or cache the metrics data after verification'
6+
inputs:
7+
snapshot:
8+
description: 'Path to the metric file'
9+
required: true
10+
artifact_key:
11+
description: 'Artifact key used for uploading and fetching artifacts'
12+
required: true
13+
runs:
14+
using: 'composite'
15+
steps:
16+
- name: Upload current metrics snapshot
17+
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
18+
with:
19+
name: ${{ inputs.artifact_key }}
20+
path: ./.metrics/${{ inputs.snapshot }}.txt
21+
retention-days: 7
22+
23+
# The github cache restore successfully restores when cache saved has same key and same path.
24+
# Hence to restore release metric with name relese_{metric_name} , the name must be changed to the same.
25+
- name: Change file name before caching
26+
if: github.ref_name == 'main'
27+
shell: bash
28+
run: |
29+
mv ./.metrics/${{ inputs.snapshot }}.txt ./.metrics/baseline_${{ inputs.snapshot }}.txt
30+
31+
- name: Cache metrics snapshot on main branch for longer retention
32+
if: github.ref_name == 'main'
33+
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57
34+
with:
35+
path: ./.metrics/baseline_${{ inputs.snapshot }}.txt
36+
key: ${{ inputs.artifact_key }}_${{ github.run_id }}
37+
38+
# Use restore keys to match prefix and fetch the latest cache
39+
# Here , restore keys is an ordered list of prefixes that need to be matched
40+
- name: Download the cached tagged metrics
41+
id: download-release-snapshot
42+
if: github.ref_name != 'main'
43+
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57
44+
with:
45+
path: ./.metrics/baseline_${{ inputs.snapshot }}.txt
46+
key: ${{ inputs.artifact_key }}
47+
restore-keys: |
48+
${{ inputs.artifact_key }}
49+
50+
- name: Calculate diff between the snapshots
51+
id: compare-snapshots
52+
if: ${{ (github.ref_name != 'main') && (steps.download-release-snapshot.outputs.cache-matched-key != '') }}
53+
shell: bash
54+
run: |
55+
python3 -m pip install prometheus-client
56+
python3 ./scripts/e2e/compare_metrics.py --file1 ./.metrics/${{ inputs.snapshot }}.txt --file2 ./.metrics/baseline_${{ inputs.snapshot }}.txt --output ./.metrics/diff_${{ inputs.snapshot }}.txt
57+
if [ $? -eq 1 ]; then
58+
echo "🛑 Differences found in metrics"
59+
exit 1
60+
fi
61+
62+
- name: Upload the diff artifact
63+
if: ${{ (github.ref_name != 'main') && (steps.compare-snapshots.outcome == 'failure') }}
64+
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
65+
with:
66+
name: diff_${{ inputs.artifact_key }}
67+
path: ./.metrics/diff_${{ inputs.snapshot }}.txt
68+
retention-days: 7
69+
70+

.github/workflows/ci-e2e-all.yml

-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,3 @@ jobs:
3636
opensearch:
3737
uses: ./.github/workflows/ci-e2e-opensearch.yml
3838

39-
40-
41-
42-

.github/workflows/ci-e2e-badger.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ jobs:
2525
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
2626

2727
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
28-
2928
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
3029
with:
3130
go-version: 1.23.x
@@ -41,6 +40,12 @@ jobs:
4140
;;
4241
esac
4342
43+
- uses: ./.github/actions/verify-metrics-snapshot
44+
if: matrix.version == 'v2'
45+
with:
46+
snapshot: metrics_snapshot_badger
47+
artifact_key: metrics_snapshot_badger_${{ matrix.version }}
48+
4449
- name: Upload coverage to codecov
4550
uses: ./.github/actions/upload-codecov
4651
with:

.github/workflows/ci-e2e-cassandra.yml

+8
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,17 @@ jobs:
4848
run: bash scripts/e2e/cassandra.sh ${{ matrix.version.major }} ${{ matrix.version.schema }} ${{ matrix.jaeger-version }}
4949
env:
5050
SKIP_APPLY_SCHEMA: ${{ matrix.create-schema == 'auto' && true || false }}
51+
52+
- uses: ./.github/actions/verify-metrics-snapshot
53+
if: matrix.jaeger-version == 'v2'
54+
with:
55+
snapshot: metrics_snapshot_cassandra
56+
artifact_key: metrics_snapshot_cassandras_${{ matrix.version.major }}_${{ matrix.version.schema }}_${{ matrix.jaeger-version }}_${{ matrix.create-schema }}
5157

5258
- name: Upload coverage to codecov
5359
uses: ./.github/actions/upload-codecov
5460
with:
5561
files: cover.out
5662
flags: cassandra-${{ matrix.version.major }}-${{ matrix.jaeger-version }}-${{ matrix.create-schema }}
63+
64+

.github/workflows/ci-e2e-elasticsearch.yml

+7-5
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,7 @@ jobs:
4040
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
4141
with:
4242
submodules: true
43-
44-
- name: Fetch git tags
45-
run: |
46-
git fetch --prune --unshallow --tags
47-
43+
4844
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
4945
with:
5046
go-version: 1.23.x
@@ -58,6 +54,12 @@ jobs:
5854
- name: Run ${{ matrix.version.distribution }} integration tests
5955
id: test-execution
6056
run: bash scripts/e2e/elasticsearch.sh ${{ matrix.version.distribution }} ${{ matrix.version.major }} ${{ matrix.version.jaeger }}
57+
58+
- uses: ./.github/actions/verify-metrics-snapshot
59+
if: matrix.version.jaeger == 'v2'
60+
with:
61+
snapshot: metrics_snapshot_elasticsearch
62+
artifact_key: metrics_snapshot_elasticsearch_${{ matrix.version.major }}_${{ matrix.version.jaeger}}
6163

6264
- name: Upload coverage to codecov
6365
uses: ./.github/actions/upload-codecov

.github/workflows/ci-e2e-grpc.yml

+6
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ jobs:
4141
;;
4242
esac
4343
44+
- uses: ./.github/actions/verify-metrics-snapshot
45+
if: matrix.version == 'v2'
46+
with:
47+
snapshot: metrics_snapshot_grpc
48+
artifact_key: metrics_snapshot_grpc_${{ matrix.version }}
49+
4450
- name: Upload coverage to codecov
4551
uses: ./.github/actions/upload-codecov
4652
with:

.github/workflows/ci-e2e-kafka.yml

+6
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ jobs:
3535
- name: Run kafka integration tests
3636
id: test-execution
3737
run: bash scripts/e2e/kafka.sh -j ${{ matrix.jaeger-version }} -v ${{ matrix.kafka-version }}
38+
39+
- uses: ./.github/actions/verify-metrics-snapshot
40+
if: matrix.jaeger-version == 'v2'
41+
with:
42+
snapshot: metrics_snapshot_kafka
43+
artifact_key: metrics_snapshot_kafka_${{ matrix.jaeger-version }}
3844

3945
- name: Upload coverage to codecov
4046
uses: ./.github/actions/upload-codecov

.github/workflows/ci-e2e-memory.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,20 @@ jobs:
2121
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
2222

2323
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
24-
24+
2525
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
2626
with:
2727
go-version: 1.23.x
2828

2929
- name: Run Memory storage integration tests
3030
run: |
3131
STORAGE=memory_v2 make jaeger-v2-storage-integration-test
32-
32+
33+
- uses: ./.github/actions/verify-metrics-snapshot
34+
with:
35+
snapshot: metrics_snapshot_memory
36+
artifact_key: metrics_snapshot_memory
37+
3338
- name: Upload coverage to codecov
3439
uses: ./.github/actions/upload-codecov
3540
with:

.github/workflows/ci-e2e-opensearch.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ jobs:
3838
with:
3939
submodules: true
4040

41-
- name: Fetch git tags
42-
run: |
43-
git fetch --prune --unshallow --tags
44-
4541
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
4642
with:
4743
go-version: 1.23.x
@@ -52,6 +48,12 @@ jobs:
5248
id: test-execution
5349
run: bash scripts/e2e/elasticsearch.sh ${{ matrix.version.distribution }} ${{ matrix.version.major }} ${{ matrix.version.jaeger }}
5450

51+
- uses: ./.github/actions/verify-metrics-snapshot
52+
if: matrix.version.jaeger == 'v2'
53+
with:
54+
snapshot: metrics_snapshot_opensearch
55+
artifact_key: metrics_snapshot_opensearch_${{ matrix.version.major }}
56+
5557
- name: Upload coverage to codecov
5658
uses: ./.github/actions/upload-codecov
5759
with:

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ sha256sum.combined.txt
5050
resource.syso
5151
.gocache
5252
test-results.json
53+
.metrics/

cmd/jaeger/internal/integration/e2e_integration.go

+21
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,32 @@ func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
9999
require.NoError(t, err)
100100

101101
t.Cleanup(func() {
102+
scrapeMetrics(t, storage)
102103
require.NoError(t, s.TraceReader.(io.Closer).Close())
103104
require.NoError(t, s.TraceWriter.(io.Closer).Close())
104105
})
105106
}
106107

108+
func scrapeMetrics(t *testing.T, storage string) {
109+
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, "http://localhost:8888/metrics", nil)
110+
require.NoError(t, err)
111+
112+
client := &http.Client{}
113+
resp, err := client.Do(req)
114+
require.NoError(t, err)
115+
defer resp.Body.Close()
116+
117+
outputDir := "../../../../.metrics"
118+
require.NoError(t, os.MkdirAll(outputDir, os.ModePerm))
119+
120+
metricsFile, err := os.Create(fmt.Sprintf("%s/metrics_snapshot_%v.txt", outputDir, storage))
121+
require.NoError(t, err)
122+
defer metricsFile.Close()
123+
124+
_, err = io.Copy(metricsFile, resp.Body)
125+
require.NoError(t, err)
126+
}
127+
107128
func createStorageCleanerConfig(t *testing.T, configFile string, storage string) string {
108129
data, err := os.ReadFile(configFile)
109130
require.NoError(t, err)

scripts/e2e/compare_metrics.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Copyright (c) 2024 The Jaeger Authors.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import argparse
5+
from difflib import unified_diff
6+
from bisect import insort
7+
from prometheus_client.parser import text_string_to_metric_families
8+
9+
def read_metric_file(file_path):
10+
with open(file_path, 'r') as f:
11+
return f.readlines()
12+
13+
def parse_metrics(content):
14+
metrics = []
15+
for family in text_string_to_metric_families(content):
16+
for sample in family.samples:
17+
labels = dict(sample.labels)
18+
#simply pop undesirable metric labels
19+
labels.pop('service_instance_id',None)
20+
label_pairs = sorted(labels.items(), key=lambda x: x[0])
21+
label_str = ','.join(f'{k}="{v}"' for k,v in label_pairs)
22+
metric = f"{family.name}{{{label_str}}}"
23+
insort(metrics , metric)
24+
25+
return metrics
26+
27+
28+
def generate_diff(file1_content, file2_content):
29+
if isinstance(file1_content, list):
30+
file1_content = ''.join(file1_content)
31+
if isinstance(file2_content, list):
32+
file2_content = ''.join(file2_content)
33+
34+
metrics1 = parse_metrics(file1_content)
35+
metrics2 = parse_metrics(file2_content)
36+
37+
diff = unified_diff(metrics1, metrics2,lineterm='',n=0)
38+
39+
return '\n'.join(diff)
40+
41+
def write_diff_file(diff_lines, output_path):
42+
43+
with open(output_path, 'w') as f:
44+
f.write(diff_lines)
45+
f.write('\n') # Add final newline
46+
print(f"Diff file successfully written to: {output_path}")
47+
48+
def main():
49+
parser = argparse.ArgumentParser(description='Generate diff between two Jaeger metric files')
50+
parser.add_argument('--file1', help='Path to first metric file')
51+
parser.add_argument('--file2', help='Path to second metric file')
52+
parser.add_argument('--output', '-o', default='metrics_diff.txt',
53+
help='Output diff file path (default: metrics_diff.txt)')
54+
55+
args = parser.parse_args()
56+
57+
# Read input files
58+
file1_lines = read_metric_file(args.file1)
59+
file2_lines = read_metric_file(args.file2)
60+
61+
# Generate diff
62+
diff_lines = generate_diff(file1_lines, file2_lines)
63+
64+
# Check if there are any differences
65+
if diff_lines:
66+
print("differences found between the metric files.")
67+
print("=== Metrics Comparison Results ===")
68+
print(diff_lines)
69+
write_diff_file(diff_lines, args.output)
70+
71+
return 1
72+
73+
print("no difference found")
74+
return 0
75+
76+
if __name__ == '__main__':
77+
main()

0 commit comments

Comments
 (0)