diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json index 877c4606db..f54711f6a6 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json +++ b/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json @@ -2,8 +2,8 @@ "deviceArch": "String", "dtypes": "Array(String)", "excludedMetrics": "Array(String)", - "filenames": "Array(String)", - "names": "Array(String)", + "benchmarks": "Array(String)", + "models": "Array(String)", "repo": "String", "startTime": "DateTime64(3)", "stopTime": "DateTime64(3)" diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql index b9caf852d2..fffd8c9716 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql +++ b/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql @@ -1,47 +1,65 @@ -- This query is used to get the list of branches and commits used by different -- OSS CI benchmark experiments. This powers HUD benchmarks dashboards +WITH benchmarks AS ( + SELECT + o.head_branch AS head_branch, + o.head_sha AS head_sha, + o.workflow_id AS id, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'device' ], + tupleElement(o.runners [ 1 ], 'name') + ) AS device, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'arch' ], + tupleElement(o.runners [ 1 ], 'type') + ) AS arch, + toStartOfDay(fromUnixTimestamp(o.timestamp)) AS event_time + FROM + benchmark.oss_ci_benchmark_v3 o + WHERE + o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) }) + AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) }) + AND o.repo = {repo: String } + AND ( + has({benchmarks: Array(String) }, o.benchmark.name) + OR empty({benchmarks: Array(String) }) + ) + AND ( + has({models: Array(String) }, o.model.name) + OR empty({models: Array(String) }) + ) + AND ( + has({dtypes: Array(String) }, o.benchmark.dtype) + OR empty({dtypes: Array(String) }) + ) + AND ( + NOT has({excludedMetrics: Array(String) }, o.metric.name) + OR empty({excludedMetrics: Array(String) }) + ) + AND notEmpty(o.metric.name) + AND notEmpty(o.benchmark.dtype) +) SELECT - DISTINCT w.head_branch AS head_branch, - w.head_sha, - w.id, - toStartOfDay(fromUnixTimestamp64Milli(o.timestamp)) AS event_time, - o.filename + DISTINCT replaceOne(head_branch, 'refs/heads/', '') AS head_branch, + head_sha, + id, + event_time FROM - benchmark.oss_ci_benchmark_v2 o - LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id + benchmarks WHERE - o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) }) - AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) }) - AND ( - has({filenames: Array(String) }, o.filename) - OR empty({filenames: Array(String) }) - ) - AND ( - has({names: Array(String) }, o.name) - OR empty({names: Array(String) }) - ) -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields - AND ( + ( CONCAT( - o.device, + device, ' (', - IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch), + IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch), ')' ) = {deviceArch: String } OR {deviceArch: String } = '' ) - AND ( - has({dtypes: Array(String) }, o.dtype) - OR empty({dtypes: Array(String) }) - ) - AND ( - NOT has({excludedMetrics: Array(String) }, o.metric) - OR empty({excludedMetrics: Array(String) }) - ) - AND notEmpty(o.metric) - AND w.html_url LIKE CONCAT('%', {repo: String }, '%') - AND notEmpty(o.dtype) - AND notEmpty(o.device) + AND notEmpty(device) ORDER BY - w.head_branch, + head_branch, event_time DESC diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json index 9ecd4e5ad9..d0099c2733 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json +++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json @@ -4,10 +4,9 @@ "deviceArch": "String", "dtypes": "Array(String)", "excludedMetrics": "Array(String)", - "filenames": "Array(String)", - "getJobId": "Bool", + "benchmarks": "Array(String)", "granularity": "String", - "names": "Array(String)", + "models": "Array(String)", "repo": "String", "startTime": "DateTime64(3)", "stopTime": "DateTime64(3)" diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql index a9b77ce6b6..31d34063ea 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql +++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql @@ -1,66 +1,93 @@ --- This query is used to get the LLMs benchmark results from different experiments. It --- queries the TPS and memory bandwidth for each model / quantization combos. This powers --- the LLMs benchmark dashboard +WITH benchmarks AS ( + SELECT + replaceOne(o.head_branch, 'refs/heads/', '') AS head_branch, + o.workflow_id AS workflow_id, + o.job_id AS job_id, + o.model.name AS model, + o.model.backend AS backend, + o.metric.name AS metric, + floor(arrayAvg(o.metric.benchmark_values), 2) AS actual, + floor(toFloat64(o.metric.target_value), 2) AS target, + o.benchmark.dtype AS dtype, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'device' ], + tupleElement(o.runners [ 1 ], 'name') + ) AS device, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'arch' ], + tupleElement(o.runners [ 1 ], 'type') + ) AS arch, + DATE_TRUNC( + {granularity: String }, + fromUnixTimestamp(o.timestamp) + ) AS granularity_bucket + FROM + benchmark.oss_ci_benchmark_v3 o + WHERE + o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) }) + AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) }) + AND o.repo = {repo: String } + AND ( + has({commits: Array(String) }, o.head_sha) + OR empty({commits: Array(String) }) + ) + AND ( + has({benchmarks: Array(String) }, o.benchmark.name) + OR empty({benchmarks: Array(String) }) + ) + AND ( + has({models: Array(String) }, o.model.name) + OR empty({models: Array(String) }) + ) + AND ( + has({dtypes: Array(String) }, o.benchmark.dtype) + OR empty({dtypes: Array(String) }) + ) + AND ( + NOT has({excludedMetrics: Array(String) }, o.metric.name) + OR empty({excludedMetrics: Array(String) }) + ) + AND notEmpty(o.metric.name) + AND notEmpty(o.benchmark.dtype) +) SELECT - DISTINCT o.workflow_id AS workflow_id, - -- As the JSON response is pretty big, only return the field if it's needed - IF({getJobId: Bool}, o.job_id, '') AS job_id, - o.name, - o.metric, - floor(toFloat64(o.actual), 2) AS actual, - floor(toFloat64(o.target), 2) AS target, - DATE_TRUNC( - {granularity: String }, - fromUnixTimestamp64Milli(o.timestamp) - ) AS granularity_bucket, - o.dtype, - o.device, - -- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column - IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) as arch + DISTINCT workflow_id, + job_id, + CONCAT(model, ' ', backend) AS name, + metric, + actual, + target, + dtype, + device, + arch, + granularity_bucket FROM - benchmark.oss_ci_benchmark_v2 o - LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id + benchmarks WHERE - o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) }) - AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) }) - AND ( - has({branches: Array(String) }, w.head_branch) - OR empty({branches: Array(String) }) - ) - AND ( - has({commits: Array(String) }, w.head_sha) - OR empty({commits: Array(String) }) - ) - AND ( - has({filenames: Array(String) }, o.filename) - OR empty({filenames: Array(String) }) + ( + has({models: Array(String) }, CONCAT(model, ' ', backend)) + OR empty({models: Array(String) }) ) AND ( - has({names: Array(String) }, o.name) - OR empty({names: Array(String) }) + has({branches: Array(String) }, head_branch) + OR empty({branches: Array(String) }) ) -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields AND ( CONCAT( - o.device, + device, ' (', - IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch), + IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch), ')' ) = {deviceArch: String } OR {deviceArch: String } = '' ) - AND ( - has({dtypes: Array(String) }, o.dtype) - OR empty({dtypes: Array(String) }) - ) - AND ( - NOT has({excludedMetrics: Array(String) }, o.metric) - OR empty({excludedMetrics: Array(String) }) - ) - AND notEmpty(o.metric) - AND notEmpty(o.dtype) - AND notEmpty(o.device) - AND w.html_url LIKE CONCAT('%', {repo: String }, '%') + AND notEmpty(device) ORDER BY granularity_bucket DESC, workflow_id DESC, diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json index 877c4606db..f54711f6a6 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json +++ b/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json @@ -2,8 +2,8 @@ "deviceArch": "String", "dtypes": "Array(String)", "excludedMetrics": "Array(String)", - "filenames": "Array(String)", - "names": "Array(String)", + "benchmarks": "Array(String)", + "models": "Array(String)", "repo": "String", "startTime": "DateTime64(3)", "stopTime": "DateTime64(3)" diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql index 18ffc97d43..6af360e113 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql +++ b/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql @@ -1,51 +1,70 @@ --- This query is used by HUD benchmarks dashboards to get the list of experiment names +WITH benchmarks AS ( + SELECT + o.benchmark.name AS benchmark, + o.model.name AS model, + o.model.backend AS backend, + o.metric.name AS metric, + o.benchmark.dtype AS dtype, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'device' ], + tupleElement(o.runners [ 1 ], 'name') + ) AS device, + IF( + empty(o.runners), + tupleElement(o.benchmark, 'extra_info') [ 'arch' ], + tupleElement(o.runners [ 1 ], 'type') + ) AS arch + FROM + benchmark.oss_ci_benchmark_v3 o + WHERE + o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) }) + AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) }) + AND o.repo = {repo: String } + AND ( + has({benchmarks: Array(String) }, o.benchmark.name) + OR empty({benchmarks: Array(String) }) + ) + AND ( + has({models: Array(String) }, o.model.name) + OR empty({models: Array(String) }) + ) + AND ( + has({dtypes: Array(String) }, o.benchmark.dtype) + OR empty({dtypes: Array(String) }) + ) + AND ( + NOT has({excludedMetrics: Array(String) }, o.metric.name) + OR empty({excludedMetrics: Array(String) }) + ) + AND notEmpty(o.metric.name) + AND notEmpty(o.benchmark.dtype) +) SELECT - DISTINCT o.filename AS filename, - o.name, - o.metric, - o.dtype, - o.device, - -- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column - IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) AS arch + DISTINCT benchmark, + CONCAT(model, ' ', backend) AS name, + metric, + dtype, + device, + arch FROM - benchmark.oss_ci_benchmark_v2 o - LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id + benchmarks WHERE - o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) }) - AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) }) - AND ( - has({filenames: Array(String) }, o.filename) - OR empty({filenames: Array(String) }) - ) - AND ( - has({names: Array(String) }, o.name) - OR empty({names: Array(String) }) - ) -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields - AND ( + ( CONCAT( - o.device, + device, ' (', - IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch), + IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch), ')' ) = {deviceArch: String } OR {deviceArch: String } = '' ) - AND ( - has({dtypes: Array(String) }, o.dtype) - OR empty({dtypes: Array(String) }) - ) - AND ( - NOT has({excludedMetrics: Array(String) }, o.metric) - OR empty({excludedMetrics: Array(String) }) - ) - AND notEmpty(o.metric) - AND w.html_url LIKE CONCAT('%', {repo: String }, '%') - AND notEmpty(o.dtype) - AND notEmpty(o.device) + AND notEmpty(device) ORDER BY - o.filename, - o.name, - o.metric, - o.dtype, - o.device + benchmark, + name, + metric, + dtype, + device diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx index 2109a87619..790e09e983 100644 --- a/torchci/components/benchmark/llms/common.tsx +++ b/torchci/components/benchmark/llms/common.tsx @@ -2,8 +2,8 @@ import { BranchAndCommit } from "lib/types"; export const REPOS = ["pytorch/pytorch", "pytorch/executorch"]; export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = { - "pytorch/pytorch": ["gpt_fast_benchmark"], - "pytorch/executorch": ["android-perf", "apple-perf"], + "pytorch/pytorch": ["PyTorch gpt-fast benchmark"], + "pytorch/executorch": ["ExecuTorch"], }; export const EXCLUDED_METRICS: string[] = ["load_status"]; export const DEFAULT_MODEL_NAME = "All Models"; diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts index fbdc4e22a0..464e6d9b51 100644 --- a/torchci/lib/benchmark/llmUtils.ts +++ b/torchci/lib/benchmark/llmUtils.ts @@ -17,14 +17,9 @@ export function useBenchmark( const queryCollection = "benchmarks"; const queryName = "oss_ci_benchmark_llms"; - const queryParamsWithBranchAndCommit: { [key: string]: any } = { - getJobId: getJobId, - ...queryParams, - }; - + const queryParamsWithBranchAndCommit: { [key: string]: any } = queryParams; (queryParamsWithBranchAndCommit as { [key: string]: any })["branches"] = branchAndCommit.branch ? [branchAndCommit.branch] : []; - (queryParamsWithBranchAndCommit as { [key: string]: any })["commits"] = branchAndCommit.commit ? [branchAndCommit.commit] : []; diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx index 7dab5724d1..07b0e5aae3 100644 --- a/torchci/pages/benchmark/llms.tsx +++ b/torchci/pages/benchmark/llms.tsx @@ -235,9 +235,9 @@ export default function Page() { deviceArch: deviceName === DEFAULT_DEVICE_NAME ? "" : deviceName, dtypes: dtypeName === DEFAULT_DTYPE_NAME ? [] : [dtypeName], excludedMetrics: EXCLUDED_METRICS, - filenames: REPO_TO_BENCHMARKS[repoName], + benchmarks: REPO_TO_BENCHMARKS[repoName], granularity: granularity, - names: modelName === DEFAULT_MODEL_NAME ? [] : [modelName], + models: modelName === DEFAULT_MODEL_NAME ? [] : [modelName], repo: repoName, startTime: dayjs(startTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"), stopTime: dayjs(stopTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),