pytorch · clee2000 · Sep 16, 2024 · Sep 13, 2024 · Sep 16, 2024 · malfet
diff --git a/.github/workflows/update-test-times.yml b/.github/workflows/update-test-times.yml
@@ -23,14 +23,22 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
+      - name: Set up python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+
       - name: Install Dependencies
-        run: pip3 install boto3==1.19.12 rockset==1.0.3
+        run: python -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0
-        run: python -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0
+        run: python3 -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0
-        run: python -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0
+        run: python3 -m pip install boto3==1.19.12 clickhouse-connect==0.7.16 requests==2.26.0
 
       - name: Update test times
         run: |
-          python3 -m torchci.update_test_times
+          python -m torchci.update_test_times
         env:
           ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
+          CLICKHOUSE_ENDPOINT: ${{ secrets.CLICKHOUSE_HUD_USER_URL }}
+          CLICKHOUSE_USERNAME: ${{ secrets.CLICKHOUSE_HUD_USER_USERNAME }}
+          CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_HUD_USER_PASSWORD }}
 
       - name: Push test file times file to this repository
         if: github.event_name != 'pull_request'

diff --git a/tools/torchci/clickhouse.py b/tools/torchci/clickhouse.py
@@ -0,0 +1,49 @@
+import json
+import os
+from functools import lru_cache
+from typing import Any, Dict
+
+import clickhouse_connect
+from torchci.utils import REPO_ROOT
+
+
+@lru_cache(maxsize=1)
+def get_clickhouse_client() -> Any:
+    endpoint = os.environ["CLICKHOUSE_ENDPOINT"]
+    # I cannot figure out why these values aren't being handled automatically
+    # when it is fine in the lambda
+    if endpoint.startswith("https://"):
+        endpoint = endpoint[len("https://") :]
+    if endpoint.endswith(":8443"):
+        endpoint = endpoint[: -len(":8443")]
+    return clickhouse_connect.get_client(
+        host=endpoint,
+        user=os.environ["CLICKHOUSE_USERNAME"],
+        password=os.environ["CLICKHOUSE_PASSWORD"],
+        secure=True,
+        interface="https",
+        port=8443,
+    )
+
+
+def query_clickhouse_saved(queryName: str, inputParams: Dict[str, Any]) -> Any:
+    path = REPO_ROOT / "torchci" / "clickhouse_queries" / queryName
+    with open(path / "query.sql") as f:
+        queryText = f.read()
+    with open(path / "params.json") as f:
+        paramsText = json.load(f)
+
+    queryParams = {name: inputParams[name] for name in paramsText}
+    return query_clickhouse(queryText, queryParams)
+
+
+def query_clickhouse(query: str, params: Dict[str, Any]) -> Any:
+    res = get_clickhouse_client().query(query, params)
+    json_res = []
+    # convert to json
+    for row in res.result_rows:
+        json_row = {}
+        for i, column in enumerate(res.column_names):
+            json_row[column] = row[i]
+        json_res.append(json_row)
+    return json_res
diff --git a/tools/torchci/requirements.txt b/tools/torchci/requirements.txt
@@ -3,3 +3,4 @@ requests
 rockset==1.0.3
 boto3==1.19.12
 pytest==7.2.0
+clickhouse-connect==0.7.16
diff --git a/tools/torchci/update_test_times.py b/tools/torchci/update_test_times.py
@@ -1,14 +1,9 @@
 import json
-import os
 from collections import defaultdict
-from pathlib import Path
 
 import requests
-import rockset
+from torchci.clickhouse import query_clickhouse_saved
 
-REPO_ROOT = Path(__file__).resolve().parent.parent.parent
-
-PROD_VERSIONS_FILE = REPO_ROOT / "torchci" / "rockset" / "prodVersions.json"
 TEST_TIMES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-times.json"
 TEST_CLASS_TIMES_URL = "https://raw.githubusercontent.com/pytorch/test-infra/generated-stats/stats/test-class-times.json"
 
@@ -18,15 +13,15 @@
 TEST_TIME_PER_CLASS_PERIODIC_JOBS_QUERY_NAME = "test_time_per_class_periodic_jobs"
 
 
-def get_file_data_from_rockset():
-    return get_data_from_rockset(file_mode=True)
+def get_file_data_from_clickhouse():
+    return get_data_from_clickhouse(file_mode=True)
 
 
-def get_class_data_from_rockset():
-    return get_data_from_rockset(file_mode=False)
+def get_class_data_from_clickhouse():
+    return get_data_from_clickhouse(file_mode=False)
 
 
-def get_data_from_rockset(file_mode: bool):
+def get_data_from_clickhouse(file_mode: bool):
     general_query_name = (
         TEST_TIME_PER_FILE_QUERY_NAME if file_mode else TEST_TIME_PER_CLASS_QUERY_NAME
     )
@@ -36,23 +31,9 @@ def get_data_from_rockset(file_mode: bool):
         else TEST_TIME_PER_CLASS_PERIODIC_JOBS_QUERY_NAME
     )
 
-    rockset_client = rockset.RocksetClient(
-        host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
-    )
-    with open(PROD_VERSIONS_FILE) as f:
-        prod_versions = json.load(f)
-
-    rockset_result = rockset_client.QueryLambdas.execute_query_lambda(
-        query_lambda=general_query_name,
-        version=prod_versions["commons"][general_query_name],
-        workspace="commons",
-    ).results
-    periodic_rockset_result = rockset_client.QueryLambdas.execute_query_lambda(
-        query_lambda=periodic_query_name,
-        version=prod_versions["commons"][periodic_query_name],
-        workspace="commons",
-    ).results
-    return rockset_result + periodic_rockset_result
+    clickhouse_result = query_clickhouse_saved(general_query_name, {})
+    periodic_clickhouse_result = query_clickhouse_saved(periodic_query_name, {})
+    return clickhouse_result + periodic_clickhouse_result
 
 
 def download_old_test_file_times():
@@ -95,15 +76,15 @@ def convert_test_class_times_to_default_dict(d):
     return new_d
 
 
-def gen_test_file_times(rockset_results, old_test_times):
+def gen_test_file_times(clickhouse_results, old_test_times):
     # Use old test times because sometimes we want to manually edit the test
     # times json and want those changes to persist.  Unfortunately this means
     # that the test times json grows and never shrinks, but we can edit the json
     # to make it smaller.  Defaults are always overriden.
     test_times = convert_test_file_times_to_default_dict(old_test_times)
     test_times_no_build_env = defaultdict(lambda: defaultdict(list))
     test_times_no_test_config = defaultdict(list)
-    for row in rockset_results:
+    for row in clickhouse_results:
         test_times[row["base_name"]][row["test_config"]][row["file"]] = row["time"]
         test_times_no_build_env[row["test_config"]][row["file"]].append(row["time"])
         test_times_no_test_config[row["file"]].append(row["time"])
@@ -123,7 +104,7 @@ def gen_test_file_times(rockset_results, old_test_times):
     return test_times
 
 
-def gen_test_class_times(rockset_results, old_test_times):
+def gen_test_class_times(clickhouse_results, old_test_times):
     # Use old test times because sometimes we want to manually edit the test
     # times json and want those changes to persist.  Unfortunately this means
     # that the test times json grows and never shrinks, but we can edit the json
@@ -134,7 +115,7 @@ def gen_test_class_times(rockset_results, old_test_times):
         lambda: defaultdict(lambda: defaultdict(list))
     )
     test_times_no_test_config = defaultdict(lambda: defaultdict(list))
-    for row in rockset_results:
+    for row in clickhouse_results:
         test_times[row["base_name"]][row["test_config"]][row["file"]][
             row["classname"]
         ] = row["time"]
@@ -164,14 +145,14 @@ def gen_test_class_times(rockset_results, old_test_times):
 
 def main() -> None:
     test_file_times = gen_test_file_times(
-        get_file_data_from_rockset(), download_old_test_file_times()
+        get_file_data_from_clickhouse(), download_old_test_file_times()
     )
 
     with open("test-times.json", "w") as f:
         f.write(json.dumps(test_file_times, indent=2, sort_keys=True))
 
     test_class_times = gen_test_class_times(
-        get_class_data_from_rockset(), download_old_test_class_times()
+        get_class_data_from_clickhouse(), download_old_test_class_times()
     )
 
     with open("test-class-times.json", "w") as f:

diff --git a/torchci/clickhouse_queries/test_time_per_class/query.sql b/torchci/clickhouse_queries/test_time_per_class/query.sql
@@ -1,47 +1,50 @@
--- !!! Query is not converted to CH syntax yet.  Delete this line when it gets converted
 WITH most_recent_strict_commits AS (
     SELECT
-        push.head_commit.id as sha,
+        push.head_commit.id as sha
     FROM
-        commons.push
+        default.push final
     WHERE
         push.ref = 'refs/heads/viable/strict'
         AND push.repository.full_name = 'pytorch/pytorch'
     ORDER BY
-        push._event_time DESC
+        push.head_commit.timestamp DESC
     LIMIT
         3
 ), workflow AS (
     SELECT
         id
     FROM
-        commons.workflow_run w
-        INNER JOIN most_recent_strict_commits c on w.head_sha = c.sha
+        materialized_views.workflow_run_by_head_sha w
+    where head_sha in (select sha from most_recent_strict_commits)
 ),
 job AS (
     SELECT
         j.name,
         j.id,
-  		j.run_id
+        j.run_id
     FROM
-        commons.workflow_job j
-        INNER JOIN workflow w on w.id = j.run_id
+        default.workflow_job j final
+    where j.id in (
+        select id from materialized_views.workflow_job_by_head_sha
+        where head_sha in (select sha from most_recent_strict_commits)
+    )
+    and j.run_id in (select id from workflow)
 ),
 class_duration_per_job AS (
     SELECT
         test_run.invoking_file as file,
         test_run.classname as classname,
         SUM(time) as time,
         REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name,
-        REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config,
+        REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config
     FROM
-        commons.test_run_summary test_run
-        /* `test_run` is ginormous and `job` is small, so lookup join is essential */
-        INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup)
+        default.test_run_summary test_run
+        INNER JOIN job ON test_run.job_id = job.id
     WHERE
         /* cpp tests do not populate `file` for some reason. */
         /* Exclude them as we don't include them in our slow test infra */
-        test_run.file IS NOT NULL
+        test_run.file != ''
+        and test_run.workflow_id in (select id from workflow)
     GROUP BY
         test_run.invoking_file,
         test_run.classname,

diff --git a/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql b/torchci/clickhouse_queries/test_time_per_class_periodic_jobs/query.sql
@@ -1,59 +1,67 @@
--- !!! Query is not converted to CH syntax yet.  Delete this line when it gets converted
--- same as test_time_per_class query except for the first select
+-- same as test_time_per_file query except for the first select
 WITH good_periodic_sha AS (
     select
         job.head_sha as sha
     from
-        commons.workflow_job job
-        JOIN commons.workflow_run workflow on workflow.id = job.run_id
-        JOIN push on workflow.head_commit.id = push.head_commit.id
+        default.workflow_job job final
+        JOIN default.workflow_run workflow final on workflow.id = job.run_id
+        JOIN default.push on workflow.head_commit.'id' = push.head_commit.'id'
     where
         workflow.name = 'periodic'
         AND workflow.head_branch LIKE 'main'
+        and workflow.repository.'full_name' = 'pytorch/pytorch'
     group by
         job.head_sha,
-        push._event_time
+        push.head_commit.'timestamp'
     having
-        BOOL_AND(
+        groupBitAnd(
             job.conclusion = 'success'
             and job.conclusion is not null
-        )
+        ) = 1
     order by
-        push._event_time desc
+        push.head_commit.'timestamp' desc
     limit
         3
 ), workflow AS (
     SELECT
         id
     FROM
-        commons.workflow_run w
-        INNER JOIN good_periodic_sha c on w.head_sha = c.sha
-        and w.name = 'periodic'
+        default.workflow_run final
+    where
+        id in (
+            SELECT id FROM materialized_views.workflow_run_by_head_sha w
+            where head_sha in (select sha from good_periodic_sha)
+        )
+        and name = 'periodic'
 ),
 job AS (
     SELECT
         j.name,
         j.id,
-        j.run_id,
+        j.run_id
     FROM
-        commons.workflow_job j
-        INNER JOIN workflow w on w.id = j.run_id
+        default.workflow_job j final
+    where j.id in (
+        select id from materialized_views.workflow_job_by_head_sha
+        where head_sha in (select sha from good_periodic_sha)
+    )
+    and j.run_id in (select id from workflow)
 ),
 class_duration_per_job AS (
     SELECT
         test_run.invoking_file as file,
         test_run.classname as classname,
         SUM(time) as time,
         REGEXP_EXTRACT(job.name, '^(.*) /', 1) as base_name,
-        REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config,
+        REGEXP_EXTRACT(job.name, '/ test \(([\w-]*),', 1) as test_config
     FROM
-        commons.test_run_summary test_run
-        /* `test_run` is ginormous and `job` is small, so lookup join is essential */
-        INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup)
+        default.test_run_summary test_run
+        INNER JOIN job ON test_run.job_id = job.id
     WHERE
         /* cpp tests do not populate `file` for some reason. */
         /* Exclude them as we don't include them in our slow test infra */
-        test_run.file IS NOT NULL
+        test_run.file != ''
+        and test_run.workflow_id in (select id from workflow)
     GROUP BY
         test_run.invoking_file,
         test_run.classname,