Skip to content

Commit

Permalink
Migrate lf rollover percentage query to CH (#5847)
Browse files Browse the repository at this point in the history
Removed references to the obsolete amz2023 runner prefixes

Validation: Ensured the data in both CH and Rockset versions of the
query return the same data and the charts look the same
  • Loading branch information
ZainRizvi authored Nov 1, 2024
1 parent d137d3a commit aab94e1
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 9 deletions.
3 changes: 1 addition & 2 deletions torchci/clickhouse_queries/lf_rollover_health/params.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
{
"days_ago": "Int64",
"granularity": "String"
"days_ago": "Int64"
}
3 changes: 3 additions & 0 deletions torchci/clickhouse_queries/lf_rollover_percentage/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"days_ago": "Int64"
}
90 changes: 90 additions & 0 deletions torchci/clickhouse_queries/lf_rollover_percentage/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
WITH
normalized_jobs AS (
SELECT
l AS label,
extract(j.name, '[^,]*') AS job_name, -- Remove shard number and label from job names
j.workflow_name,
toStartOfInterval(j.started_at, INTERVAL 1 HOUR) AS bucket
FROM
-- Deliberatly not adding FINAL to this workflow_job.
-- Risks of not using it:
-- - You may get duplicate records for rows that were updated corresponding to their
-- before/after states, but as long as there’s some mechanism in the query to account
-- for that it’s okay (we check for j.status = 'completed`).
-- - In the worst case scenario, you may only see the ‘old’ version of the records for some rows
-- Costs of using it:
-- - Query procesing time increases from ~5 -> 16 seconds
-- - Memory usage grows from 7.5 GB -> 32 GB
-- So the tradeoff is worth it for this query.
workflow_job AS j
ARRAY JOIN j.labels as l
WHERE
j.created_at > now() - INTERVAL {days_ago: Int64} DAY
AND j.status = 'completed'
AND l != 'self-hosted'
AND l NOT LIKE 'lf.c.%'
AND l NOT LIKE '%canary%'
),
lf_jobs AS (
SELECT
DISTINCT j.job_name
FROM
normalized_jobs AS j
WHERE
j.label LIKE 'lf%'
),
-- filter jobs down to the ones that ran in both
-- LF and Meta fleets
comparable_jobs AS (
SELECT
j.bucket,
j.label,
j.job_name,
-- Remove shard number and label from job names
j.workflow_name
FROM
normalized_jobs AS j
INNER JOIN
lf_jobs AS lfj ON j.job_name = lfj.job_name
),
success_stats AS (
SELECT
bucket,
count(*) AS group_size,
job_name,
workflow_name,
label,
if(substring(label, 1, 3) = 'lf.', True, False) AS lf_fleet
FROM
comparable_jobs
GROUP BY
bucket, job_name, workflow_name, label
),
comparison_stats AS (
SELECT
lf.bucket,
SUM(lf.group_size + m.group_size) AS total_jobs,
SUM(m.group_size) AS compliment_jobs,
SUM(lf.group_size) AS counted_jobs,
m.lf_fleet AS c_fleet,
lf.lf_fleet AS m_fleet,
CAST(SUM(lf.group_size) AS Float32) / SUM(lf.group_size + m.group_size) * 100 AS percentage,
IF(lf.lf_fleet, 'Linux Foundation', 'Meta') AS fleet
FROM
success_stats AS lf
INNER JOIN
success_stats AS m ON lf.bucket = m.bucket
WHERE
lf.job_name = m.job_name
AND lf.workflow_name = m.workflow_name
AND (
(lf.lf_fleet = 1 AND m.lf_fleet = 0)
OR (lf.lf_fleet = 0 AND m.lf_fleet = 1)
)
AND lf.group_size > 3
AND m.group_size > 3
GROUP BY
lf.bucket, lf.lf_fleet, m.lf_fleet
)
SELECT * FROM comparison_stats
ORDER BY bucket DESC, fleet
19 changes: 12 additions & 7 deletions torchci/pages/metrics.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1230,18 +1230,23 @@ export default function Page() {
title={"Percentage of jobs rolled over to Linux Foundation"}
queryName={"lf_rollover_percentage"}
queryCollection={"metrics"}
queryParams={[
{
name: "days_ago",
type: "int",
value: timeRange,
},
]}
queryParams={
useClickHouse
? { ...timeParamsClickHouse, days_ago: timeRange }
: [
{
name: "days_ago",
type: "int",
value: timeRange,
},
]
}
granularity={"hour"}
timeFieldName={"bucket"}
yAxisFieldName={"percentage"}
groupByFieldName={"fleet"}
yAxisRenderer={(value) => value.toFixed(2).toString() + "%"}
useClickHouse={useClickHouse}
/>
</Grid>
</Grid>
Expand Down

0 comments on commit aab94e1

Please sign in to comment.