From d730c55ee6efbbfbfa0d9c868f247f72891dbffd Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Fri, 13 Sep 2024 16:12:02 -0700 Subject: [PATCH] tc --- .github/workflows/update-test-times.yml | 12 ++++- tools/torchci/clickhouse.py | 49 +++++++++++++++++++ tools/torchci/requirements.txt | 1 + tools/torchci/update_test_times.py | 49 ++++++------------- .../test_time_per_class/query.sql | 31 ++++++------ .../query.sql | 48 ++++++++++-------- .../test_time_per_file/query.sql | 31 ++++++------ .../query.sql | 46 ++++++++++------- 8 files changed, 164 insertions(+), 103 deletions(-) create mode 100644 tools/torchci/clickhouse.py diff --git a/.github/workflows/update-test-times.yml b/.github/workflows/update-test-times.yml index 52a092589f..69fab5ff50 100644 --- a/.github/workflows/update-test-times.yml +++ b/.github/workflows/update-test-times.yml @@ -23,14 +23,22 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Set up python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - name: Install Dependencies - run: pip3 install boto3==1.19.12 rockset==1.0.3 + run: python -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0 - name: Update test times run: | - python3 -m torchci.update_test_times + python -m torchci.update_test_times env: ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }} + CLICKHOUSE_ENDPOINT: ${{ secrets.CLICKHOUSE_HUD_USER_URL }} + CLICKHOUSE_USERNAME: ${{ secrets.CLICKHOUSE_HUD_USER_USERNAME }} + CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_HUD_USER_PASSWORD }} - name: Push test file times file to this repository if: github.event_name != 'pull_request' diff --git a/tools/torchci/clickhouse.py b/tools/torchci/clickhouse.py new file mode 100644 index 0000000000..c55b86dd93 --- /dev/null +++ b/tools/torchci/clickhouse.py @@ -0,0 +1,49 @@ +import json +import os +from functools import lru_cache +from typing import Any, Dict + +import clickhouse_connect +from torchci.utils import REPO_ROOT + + +@lru_cache(maxsize=1) +def get_clickhouse_client() -> Any: + endpoint = os.environ["CLICKHOUSE_ENDPOINT"] + # I cannot figure out why these values aren't being handled automatically + # when it is fine in the lambda + if endpoint.startswith("https://"): + endpoint = endpoint[len("https://") :] + if endpoint.endswith(":8443"): + endpoint = endpoint[: -len(":8443")] + return clickhouse_connect.get_client( + host=endpoint, + user=os.environ["CLICKHOUSE_USERNAME"], + password=os.environ["CLICKHOUSE_PASSWORD"], + secure=True, + interface="https", + port=8443, + ) + + +def query_clickhouse_saved(queryName: str, inputParams: Dict[str, Any]) -> Any: + path = REPO_ROOT / "torchci" / "clickhouse_queries" / queryName + with open(path / "query.sql") as f: + queryText = f.read() + with open(path / "params.json") as f: + paramsText = json.load(f) + + queryParams = {name: inputParams[name] for name in paramsText} + return query_clickhouse(queryText, queryParams) + + +def query_clickhouse(query: str, params: Dict[str, Any]) -> Any: + res = get_clickhouse_client().query(query, params) + json_res = [] + # convert to json + for row in res.result_rows: + json_row = {} + for i, column in enumerate(res.column_names): + json_row[column] = row[i] + json_res.append(json_row) + return json_res diff --git a/tools/torchci/requirements.txt b/tools/torchci/requirements.txt index a4779f619d..a7eda82b15 100644 --- a/tools/torchci/requirements.txt +++ b/tools/torchci/requirements.txt @@ -3,3 +3,4 @@ requests rockset==1.0.3 boto3==1.19.12 pytest==7.2.0 +clickhouse-connect==0.7.16 diff --git a/tools/torchci/update_test_times.py b/tools/torchci/update_test_times.py index 4958b4daec..4af8a6ab1d 100644 --- a/tools/torchci/update_test_times.py +++ b/tools/torchci/update_test_times.py @@ -1,14 +1,9 @@ import json -import os from collections import defaultdict -from pathlib import Path import requests -import rockset +from torchci.clickhouse import query_clickhouse_saved -REPO_ROOT = Path(__file__).resolve().parent.parent.parent - -PROD_VERSIONS_FILE = REPO_ROOT / "torchci" / "rockset" / "prodVersions.json" TEST_TIMES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-times.json" TEST_CLASS_TIMES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-class-times.json" @@ -18,15 +13,15 @@ TEST_TIME_PER_CLASS_PERIODIC_JOBS_QUERY_NAME = "test_time_per_class_periodic_jobs" -def get_file_data_from_rockset(): - return get_data_from_rockset(file_mode=True) +def get_file_data_from_clickhouse(): + return get_data_from_clickhouse(file_mode=True) -def get_class_data_from_rockset(): - return get_data_from_rockset(file_mode=False) +def get_class_data_from_clickhouse(): + return get_data_from_clickhouse(file_mode=False) -def get_data_from_rockset(file_mode: bool): +def get_data_from_clickhouse(file_mode: bool): general_query_name = ( TEST_TIME_PER_FILE_QUERY_NAME if file_mode else TEST_TIME_PER_CLASS_QUERY_NAME ) @@ -36,23 +31,9 @@ def get_data_from_rockset(file_mode: bool): else TEST_TIME_PER_CLASS_PERIODIC_JOBS_QUERY_NAME ) - rockset_client = rockset.RocksetClient( - host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"] - ) - with open(PROD_VERSIONS_FILE) as f: - prod_versions = json.load(f) - - rockset_result = rockset_client.QueryLambdas.execute_query_lambda( - query_lambda=general_query_name, - version=prod_versions["commons"][general_query_name], - workspace="commons", - ).results - periodic_rockset_result = rockset_client.QueryLambdas.execute_query_lambda( - query_lambda=periodic_query_name, - version=prod_versions["commons"][periodic_query_name], - workspace="commons", - ).results - return rockset_result + periodic_rockset_result + clickhouse_result = query_clickhouse_saved(general_query_name, {}) + periodic_clickhouse_result = query_clickhouse_saved(periodic_query_name, {}) + return clickhouse_result + periodic_clickhouse_result def download_old_test_file_times(): @@ -95,7 +76,7 @@ def convert_test_class_times_to_default_dict(d): return new_d -def gen_test_file_times(rockset_results, old_test_times): +def gen_test_file_times(clickhouse_results, old_test_times): # Use old test times because sometimes we want to manually edit the test # times json and want those changes to persist. Unfortunately this means # that the test times json grows and never shrinks, but we can edit the json @@ -103,7 +84,7 @@ def gen_test_file_times(rockset_results, old_test_times): test_times = convert_test_file_times_to_default_dict(old_test_times) test_times_no_build_env = defaultdict(lambda: defaultdict(list)) test_times_no_test_config = defaultdict(list) - for row in rockset_results: + for row in clickhouse_results: test_times[row["base_name"]][row["test_config"]][row["file"]] = row["time"] test_times_no_build_env[row["test_config"]][row["file"]].append(row["time"]) test_times_no_test_config[row["file"]].append(row["time"]) @@ -123,7 +104,7 @@ def gen_test_file_times(rockset_results, old_test_times): return test_times -def gen_test_class_times(rockset_results, old_test_times): +def gen_test_class_times(clickhouse_results, old_test_times): # Use old test times because sometimes we want to manually edit the test # times json and want those changes to persist. Unfortunately this means # that the test times json grows and never shrinks, but we can edit the json @@ -134,7 +115,7 @@ def gen_test_class_times(rockset_results, old_test_times): lambda: defaultdict(lambda: defaultdict(list)) ) test_times_no_test_config = defaultdict(lambda: defaultdict(list)) - for row in rockset_results: + for row in clickhouse_results: test_times[row["base_name"]][row["test_config"]][row["file"]][ row["classname"] ] = row["time"] @@ -164,14 +145,14 @@ def gen_test_class_times(rockset_results, old_test_times): def main() -> None: test_file_times = gen_test_file_times( - get_file_data_from_rockset(), download_old_test_file_times() + get_file_data_from_clickhouse(), download_old_test_file_times() ) with open("test-times.json", "w") as f: f.write(json.dumps(test_file_times, indent=2, sort_keys=True)) test_class_times = gen_test_class_times( - get_class_data_from_rockset(), download_old_test_class_times() + get_class_data_from_clickhouse(), download_old_test_class_times() ) with open("test-class-times.json", "w") as f: diff --git a/torchci/clickhouse_queries/test_time_per_class/query.sql b/torchci/clickhouse_queries/test_time_per_class/query.sql index 5d88e0f9b6..aab54f585e 100644 --- a/torchci/clickhouse_queries/test_time_per_class/query.sql +++ b/torchci/clickhouse_queries/test_time_per_class/query.sql @@ -1,31 +1,34 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted WITH most_recent_strict_commits AS ( SELECT - push.head_commit.id as sha, + push.head_commit.id as sha FROM - commons.push + default.push final WHERE push.ref = 'refs/heads/viable/strict' AND push.repository.full_name = 'pytorch/pytorch' ORDER BY - push._event_time DESC + push.head_commit.timestamp DESC LIMIT 3 ), workflow AS ( SELECT id FROM - commons.workflow_run w - INNER JOIN most_recent_strict_commits c on w.head_sha = c.sha + materialized_views.workflow_run_by_head_sha w + where head_sha in (select sha from most_recent_strict_commits) ), job AS ( SELECT j.name, j.id, - j.run_id + j.run_id FROM - commons.workflow_job j - INNER JOIN workflow w on w.id = j.run_id + default.workflow_job j final + where j.id in ( + select id from materialized_views.workflow_job_by_head_sha + where head_sha in (select sha from most_recent_strict_commits) + ) + and j.run_id in (select id from workflow) ), class_duration_per_job AS ( SELECT @@ -33,15 +36,15 @@ class_duration_per_job AS ( test_run.classname as classname, SUM(time) as time, REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name, - REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config, + REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config FROM - commons.test_run_summary test_run - /* `test_run` is ginormous and `job` is small, so lookup join is essential */ - INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup) + default.test_run_summary test_run + INNER JOIN job ON test_run.job_id = job.id WHERE /* cpp tests do not populate `file` for some reason. */ /* Exclude them as we don't include them in our slow test infra */ - test_run.file IS NOT NULL + test_run.file != '' + and test_run.workflow_id in (select id from workflow) GROUP BY test_run.invoking_file, test_run.classname, diff --git a/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql b/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql index f462cea223..559b35191d 100644 --- a/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql +++ b/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql @@ -1,43 +1,51 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted --- same as test_time_per_class query except for the first select +-- same as test_time_per_file query except for the first select WITH good_periodic_sha AS ( select job.head_sha as sha from - commons.workflow_job job - JOIN commons.workflow_run workflow on workflow.id = job.run_id - JOIN push on workflow.head_commit.id = push.head_commit.id + default.workflow_job job final + JOIN default.workflow_run workflow final on workflow.id = job.run_id + JOIN default.push on workflow.head_commit.'id' = push.head_commit.'id' where workflow.name = 'periodic' AND workflow.head_branch LIKE 'main' + and workflow.repository.'full_name' = 'pytorch/pytorch' group by job.head_sha, - push._event_time + push.head_commit.'timestamp' having - BOOL_AND( + groupBitAnd( job.conclusion = 'success' and job.conclusion is not null - ) + ) = 1 order by - push._event_time desc + push.head_commit.'timestamp' desc limit 3 ), workflow AS ( SELECT id FROM - commons.workflow_run w - INNER JOIN good_periodic_sha c on w.head_sha = c.sha - and w.name = 'periodic' + default.workflow_run final + where + id in ( + SELECT id FROM materialized_views.workflow_run_by_head_sha w + where head_sha in (select sha from good_periodic_sha) + ) + and name = 'periodic' ), job AS ( SELECT j.name, j.id, - j.run_id, + j.run_id FROM - commons.workflow_job j - INNER JOIN workflow w on w.id = j.run_id + default.workflow_job j final + where j.id in ( + select id from materialized_views.workflow_job_by_head_sha + where head_sha in (select sha from good_periodic_sha) + ) + and j.run_id in (select id from workflow) ), class_duration_per_job AS ( SELECT @@ -45,15 +53,15 @@ class_duration_per_job AS ( test_run.classname as classname, SUM(time) as time, REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name, - REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config, + REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config FROM - commons.test_run_summary test_run - /* `test_run` is ginormous and `job` is small, so lookup join is essential */ - INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup) + default.test_run_summary test_run + INNER JOIN job ON test_run.job_id = job.id WHERE /* cpp tests do not populate `file` for some reason. */ /* Exclude them as we don't include them in our slow test infra */ - test_run.file IS NOT NULL + test_run.file != '' + and test_run.workflow_id in (select id from workflow) GROUP BY test_run.invoking_file, test_run.classname, diff --git a/torchci/clickhouse_queries/test_time_per_file/query.sql b/torchci/clickhouse_queries/test_time_per_file/query.sql index e3153d7e20..2ced4a60c9 100644 --- a/torchci/clickhouse_queries/test_time_per_file/query.sql +++ b/torchci/clickhouse_queries/test_time_per_file/query.sql @@ -1,46 +1,49 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted WITH most_recent_strict_commits AS ( SELECT - push.head_commit.id as sha, + push.head_commit.id as sha FROM - commons.push + default.push final WHERE push.ref = 'refs/heads/viable/strict' AND push.repository.full_name = 'pytorch/pytorch' ORDER BY - push._event_time DESC + push.head_commit.timestamp DESC LIMIT 3 ), workflow AS ( SELECT id FROM - commons.workflow_run w - INNER JOIN most_recent_strict_commits c on w.head_sha = c.sha + materialized_views.workflow_run_by_head_sha w + where head_sha in (select sha from most_recent_strict_commits) ), job AS ( SELECT j.name, j.id, - j.run_id + j.run_id FROM - commons.workflow_job j - INNER JOIN workflow w on w.id = j.run_id + default.workflow_job j final + where j.id in ( + select id from materialized_views.workflow_job_by_head_sha + where head_sha in (select sha from most_recent_strict_commits) + ) + and j.run_id in (select id from workflow) ), file_duration_per_job AS ( SELECT test_run.invoking_file as file, SUM(time) as time, REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name, - REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config, + REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config FROM - commons.test_run_summary test_run - /* `test_run` is ginormous and `job` is small, so lookup join is essential */ - INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup) + default.test_run_summary test_run + INNER JOIN job ON test_run.job_id = job.id WHERE /* cpp tests do not populate `file` for some reason. */ /* Exclude them as we don't include them in our slow test infra */ - test_run.file IS NOT NULL + test_run.file != '' + and test_run.workflow_id in (select id from workflow) GROUP BY test_run.invoking_file, base_name, diff --git a/torchci/clickhouse_queries/test_time_per_file_periodic_jobs/query.sql b/torchci/clickhouse_queries/test_time_per_file_periodic_jobs/query.sql index 84b0a09698..33de90fdb6 100644 --- a/torchci/clickhouse_queries/test_time_per_file_periodic_jobs/query.sql +++ b/torchci/clickhouse_queries/test_time_per_file_periodic_jobs/query.sql @@ -1,58 +1,66 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted -- same as test_time_per_file query except for the first select WITH good_periodic_sha AS ( select job.head_sha as sha from - commons.workflow_job job - JOIN commons.workflow_run workflow on workflow.id = job.run_id - JOIN push on workflow.head_commit.id = push.head_commit.id + default.workflow_job job final + JOIN default.workflow_run workflow final on workflow.id = job.run_id + JOIN default.push on workflow.head_commit.'id' = push.head_commit.'id' where workflow.name = 'periodic' AND workflow.head_branch LIKE 'main' + and workflow.repository.'full_name' = 'pytorch/pytorch' group by job.head_sha, - push._event_time + push.head_commit.'timestamp' having - BOOL_AND( + groupBitAnd( job.conclusion = 'success' and job.conclusion is not null - ) + ) = 1 order by - push._event_time desc + push.head_commit.'timestamp' desc limit 3 ), workflow AS ( SELECT id FROM - commons.workflow_run w - INNER JOIN good_periodic_sha c on w.head_sha = c.sha - and w.name = 'periodic' + default.workflow_run final + where + id in ( + SELECT id FROM materialized_views.workflow_run_by_head_sha w + where head_sha in (select sha from good_periodic_sha) + ) + and name = 'periodic' ), job AS ( SELECT j.name, j.id, - j.run_id, + j.run_id FROM - commons.workflow_job j - INNER JOIN workflow w on w.id = j.run_id + default.workflow_job j final + where j.id in ( + select id from materialized_views.workflow_job_by_head_sha + where head_sha in (select sha from good_periodic_sha) + ) + and j.run_id in (select id from workflow) ), file_duration_per_job AS ( SELECT test_run.invoking_file as file, SUM(time) as time, REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name, - REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config, + REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config FROM - commons.test_run_summary test_run - /* `test_run` is ginormous and `job` is small, so lookup join is essential */ - INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup) + default.test_run_summary test_run + INNER JOIN job ON test_run.job_id = job.id WHERE /* cpp tests do not populate `file` for some reason. */ /* Exclude them as we don't include them in our slow test infra */ - test_run.file IS NOT NULL + test_run.file != '' + and test_run.workflow_id in (select id from workflow) GROUP BY test_run.invoking_file, base_name,