From e0d02f197128adae36f4982e6c997fcf3bf74b64 Mon Sep 17 00:00:00 2001 From: Zain Rizvi Date: Wed, 20 Nov 2024 13:39:44 -0600 Subject: [PATCH] [BE] [Perf optimization] Speed up queued_jobs query (#5945) Optimizes the query to use just 10% of the resources compared to before by : - Adding a max time range to limit results to just the relevant window - Move the time range filter to the earlier query. Not sure why it makes such a big difference, but it does. Maybe due to the lack of FINAL there? Adding in FINAL tanks the performance Perf change: - Memory used: 9.7 GB -> 0.8 GB - Elapsed time: 3.5 s -> 0.4s - Rows read: 12 million -> 8 million --- torchci/clickhouse_queries/queued_jobs/query.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/torchci/clickhouse_queries/queued_jobs/query.sql b/torchci/clickhouse_queries/queued_jobs/query.sql index 66eb40e6a1..493222a58c 100644 --- a/torchci/clickhouse_queries/queued_jobs/query.sql +++ b/torchci/clickhouse_queries/queued_jobs/query.sql @@ -1,6 +1,10 @@ --- This query is used by HUD metrics page to get the list of queued jobs with possible_queued_jobs as ( - select id, run_id from default.workflow_job where status = 'queued' + select id, run_id + from default.workflow_job -- FINAL not needed since we just use this to filter a table that has already been FINALed + where status = 'queued' + AND created_at < (CURRENT_TIMESTAMP() - INTERVAL 5 MINUTE) + AND created_at > (CURRENT_TIMESTAMP() - INTERVAL 1 WEEK) ) SELECT DATE_DIFF( @@ -27,9 +31,6 @@ WHERE and workflow.id in (select run_id from possible_queued_jobs) and workflow.repository.'full_name' = 'pytorch/pytorch' AND job.status = 'queued' - AND job.created_at < ( - CURRENT_TIMESTAMP() - INTERVAL 5 MINUTE - ) /* These two conditions are workarounds for GitHub's broken API. Sometimes */ /* jobs get stuck in a permanently "queued" state but definitely ran. We can */ /* detect this by looking at whether any steps executed (if there were, */