1
+ WITH
2
+ normalized_jobs AS (
3
+ SELECT
4
+ l AS label,
5
+ extract(j .name , ' [^,]*' ) AS job_name, -- Remove shard number and label from job names
6
+ j .workflow_name ,
7
+ toStartOfInterval(j .started_at , INTERVAL 1 HOUR) AS bucket
8
+ FROM
9
+ -- Deliberatly not adding FINAL to this workflow_job.
10
+ -- Risks of not using it:
11
+ -- - You may get duplicate records for rows that were updated corresponding to their
12
+ -- before/after states, but as long as there’s some mechanism in the query to account
13
+ -- for that it’s okay (we check for j.status = 'completed`).
14
+ -- - In the worst case scenario, you may only see the ‘old’ version of the records for some rows
15
+ -- Costs of using it:
16
+ -- - Query procesing time increases from ~5 -> 16 seconds
17
+ -- - Memory usage grows from 7.5 GB -> 32 GB
18
+ -- So the tradeoff is worth it for this query.
19
+ workflow_job AS j
20
+ ARRAY JOIN j .labels as l
21
+ WHERE
22
+ j .created_at > now() - INTERVAL {days_ago: Int64} DAY
23
+ AND j .status = ' completed'
24
+ AND l != ' self-hosted'
25
+ AND l NOT LIKE ' lf.c.%'
26
+ AND l NOT LIKE ' %canary%'
27
+ ),
28
+ lf_jobs AS (
29
+ SELECT
30
+ DISTINCT j .job_name
31
+ FROM
32
+ normalized_jobs AS j
33
+ WHERE
34
+ j .label LIKE ' lf%'
35
+ ),
36
+ -- filter jobs down to the ones that ran in both
37
+ -- LF and Meta fleets
38
+ comparable_jobs AS (
39
+ SELECT
40
+ j .bucket ,
41
+ j .label ,
42
+ j .job_name ,
43
+ -- Remove shard number and label from job names
44
+ j .workflow_name
45
+ FROM
46
+ normalized_jobs AS j
47
+ INNER JOIN
48
+ lf_jobs AS lfj ON j .job_name = lfj .job_name
49
+ ),
50
+ success_stats AS (
51
+ SELECT
52
+ bucket,
53
+ count (* ) AS group_size,
54
+ job_name,
55
+ workflow_name,
56
+ label,
57
+ if(substring (label, 1 , 3 ) = ' lf.' , True, False) AS lf_fleet
58
+ FROM
59
+ comparable_jobs
60
+ GROUP BY
61
+ bucket, job_name, workflow_name, label
62
+ ),
63
+ comparison_stats AS (
64
+ SELECT
65
+ lf .bucket ,
66
+ SUM (lf .group_size + m .group_size ) AS total_jobs,
67
+ SUM (m .group_size ) AS compliment_jobs,
68
+ SUM (lf .group_size ) AS counted_jobs,
69
+ m .lf_fleet AS c_fleet,
70
+ lf .lf_fleet AS m_fleet,
71
+ CAST(SUM (lf .group_size ) AS Float32) / SUM (lf .group_size + m .group_size ) * 100 AS percentage,
72
+ IF(lf .lf_fleet , ' Linux Foundation' , ' Meta' ) AS fleet
73
+ FROM
74
+ success_stats AS lf
75
+ INNER JOIN
76
+ success_stats AS m ON lf .bucket = m .bucket
77
+ WHERE
78
+ lf .job_name = m .job_name
79
+ AND lf .workflow_name = m .workflow_name
80
+ AND (
81
+ (lf .lf_fleet = 1 AND m .lf_fleet = 0 )
82
+ OR (lf .lf_fleet = 0 AND m .lf_fleet = 1 )
83
+ )
84
+ AND lf .group_size > 3
85
+ AND m .group_size > 3
86
+ GROUP BY
87
+ lf .bucket , lf .lf_fleet , m .lf_fleet
88
+ )
89
+ SELECT * FROM comparison_stats
90
+ ORDER BY bucket DESC , fleet
0 commit comments