Skip to content

Commit

Permalink
Improve master_commit_red query performance (#6174)
Browse files Browse the repository at this point in the history
Pre filter the commits so we can filter the workflow job and workflow
run tables on it later

This improves speed for all time ranges up to 1 year. I did not check
beyond that
I believe the memory used is about the same, but it scans more rows for
some reason

This is the query behind this chart
<img width="1266" alt="image"
src="https://github.com/user-attachments/assets/e00c18dc-a87a-4648-99cd-9c35295baabf"
/>
on the metrics page

Tentative wins, sample size 3
```
+------+----------+-----------+-------------+---------------+--------------+---------------+----------------+--------------+
| Test | Avg Time | Base Time | Time Change | % Time Change |   Avg Mem    |    Base Mem   |   Mem Change   | % Mem Change |
+------+----------+-----------+-------------+---------------+--------------+---------------+----------------+--------------+
|  0   |   1163   |   23117   |    -21954   |      -95      | 157241294.6  | 1539051189.75 | -1381809895.15 |     -90      |
|  1   |   1281   |   14509   |    -13228   |      -91      |  350569557   |  1585082143.8 | -1234512586.8  |     -78      |
|  2   |   8704   |   22954   |    -14250   |      -62      | 1774302065.6 |   3929546293  | -2155244227.4  |     -55      |
+------+----------+-----------+-------------+---------------+--------------+---------------+----------------+--------------+
```
  • Loading branch information
clee2000 authored Jan 22, 2025
1 parent 5c23404 commit f3c27c3
Showing 1 changed file with 38 additions and 19 deletions.
57 changes: 38 additions & 19 deletions torchci/clickhouse_queries/master_commit_red/query.sql
Original file line number Diff line number Diff line change
@@ -1,38 +1,57 @@
--- This query is used to show the histogram of trunk red commits on HUD metrics page
--- during a period of time
WITH all_jobs AS (
-- Split up the query into multiple CTEs to make it faster.
with commits as (
select
push.head_commit.'timestamp' as time,
push.head_commit.'id' as sha
from
-- Not using final since push table doesn't really get updated
push
where
push.ref in ('refs/heads/master', 'refs/heads/main')
and push.repository.'owner'.'name' = 'pytorch'
and push.repository.'name' = 'pytorch'
and push.head_commit.'timestamp' >= {startTime: DateTime64(3)}
and push.head_commit.'timestamp' < {stopTime: DateTime64(3)}
),
all_runs as (
select
workflow_run.id as id,
workflow_run.head_commit.'id' as sha,
workflow_run.name as name,
commit.time as time
from
workflow_run final
join commits commit on workflow_run.head_commit.'id' = commit.sha
where
(
-- Limit it to workflows which block viable/strict upgrades
workflow_run.name in ('Lint', 'pull', 'trunk')
OR workflow_run.name like 'linux-binary%'
)
AND workflow_run.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
and workflow_run.id in (select id from materialized_views.workflow_run_by_head_sha where head_sha in (select sha from commits))
),
all_jobs AS (
SELECT
push.head_commit.'timestamp' AS time,
all_runs.time AS time,
CASE
WHEN job.conclusion = 'failure' THEN 'red'
WHEN job.conclusion = 'timed_out' THEN 'red'
WHEN job.conclusion = 'cancelled' THEN 'red'
WHEN job.conclusion = '' THEN 'pending'
ELSE 'green'
END as conclusion,
push.head_commit.'id' AS sha
all_runs.sha AS sha
FROM
workflow_job job FINAL
JOIN workflow_run FINAL ON workflow_run.id = workflow_job.run_id
JOIN push FINAL ON workflow_run.head_commit.'id' = push.head_commit.'id'
default.workflow_job job final join all_runs all_runs on all_runs.id = workflow_job.run_id
WHERE
job.name != 'ciflow_should_run'
AND job.name != 'generate-test-matrix'
AND (
-- Limit it to workflows which block viable/strict upgrades
workflow_run.name in ('Lint', 'pull', 'trunk')
OR workflow_run.name like 'linux-binary%'
)
AND job.name NOT LIKE '%rerun_disabled_tests%'
AND job.name NOT LIKE '%unstable%'
AND workflow_run.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND push.ref IN (
'refs/heads/master', 'refs/heads/main'
)
AND push.repository.'owner'.'name' = 'pytorch'
AND push.repository.'name' = 'pytorch'
AND push.head_commit.'timestamp' >= {startTime: DateTime64(3)}
AND push.head_commit.'timestamp' < {stopTime: DateTime64(3)}
and job.id in (select id from materialized_views.workflow_job_by_head_sha where head_sha in (select sha from commits))
),
commit_overall_conclusion AS (
SELECT
Expand Down

0 comments on commit f3c27c3

Please sign in to comment.