Skip to content

Commit

Permalink
Migrate LLM dashboards to v3 (#6057)
Browse files Browse the repository at this point in the history
This migrates the queries from `oss_ci_benchmark_v2` to
`oss_ci_benchmark_v3`. This will unblock the following updates:

* Add delegation backend support for [ExecuTorch
dashboard](https://hud.pytorch.org/benchmark/llms?repoName=pytorch%2Fexecutorch)
instead of bundling it together with the model name `edsr coreml_all`
(cc @guangy10)
* Add LLM AO dashboard, whose data is only available in
`oss_ci_benchmark_v3` (cc @jerryzh168)

Some minor fixes that go with this:

* Change the parameters to `benchmarks` and `models`. This is clearer
than `filenames` and `names`
* Remove `getJobId` param. Its origin is from the TorchInductor query
when the job ID is a string. In `oss_ci_benchmark_v3`, the job is an
UInt64, so there is no saving there.

### Testing

*
https://torchci-git-fork-huydhn-migrate-llm-dashboard-v3-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fpytorch
*
https://torchci-git-fork-huydhn-migrate-llm-dashboard-v3-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fexecutorch
  • Loading branch information
huydhn authored Dec 13, 2024
1 parent 1577e6b commit 28ccdef
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 135 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"deviceArch": "String",
"dtypes": "Array(String)",
"excludedMetrics": "Array(String)",
"filenames": "Array(String)",
"names": "Array(String)",
"benchmarks": "Array(String)",
"models": "Array(String)",
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
Expand Down
84 changes: 51 additions & 33 deletions torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql
Original file line number Diff line number Diff line change
@@ -1,47 +1,65 @@
-- This query is used to get the list of branches and commits used by different
-- OSS CI benchmark experiments. This powers HUD benchmarks dashboards
WITH benchmarks AS (
SELECT
o.head_branch AS head_branch,
o.head_sha AS head_sha,
o.workflow_id AS id,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
tupleElement(o.runners [ 1 ], 'name')
) AS device,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
tupleElement(o.runners [ 1 ], 'type')
) AS arch,
toStartOfDay(fromUnixTimestamp(o.timestamp)) AS event_time
FROM
benchmark.oss_ci_benchmark_v3 o
WHERE
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
AND o.repo = {repo: String }
AND (
has({benchmarks: Array(String) }, o.benchmark.name)
OR empty({benchmarks: Array(String) })
)
AND (
has({models: Array(String) }, o.model.name)
OR empty({models: Array(String) })
)
AND (
has({dtypes: Array(String) }, o.benchmark.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric.name)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric.name)
AND notEmpty(o.benchmark.dtype)
)
SELECT
DISTINCT w.head_branch AS head_branch,
w.head_sha,
w.id,
toStartOfDay(fromUnixTimestamp64Milli(o.timestamp)) AS event_time,
o.filename
DISTINCT replaceOne(head_branch, 'refs/heads/', '') AS head_branch,
head_sha,
id,
event_time
FROM
benchmark.oss_ci_benchmark_v2 o
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
benchmarks
WHERE
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
AND (
has({filenames: Array(String) }, o.filename)
OR empty({filenames: Array(String) })
)
AND (
has({names: Array(String) }, o.name)
OR empty({names: Array(String) })
)
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
AND (
(
CONCAT(
o.device,
device,
' (',
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
')'
) = {deviceArch: String }
OR {deviceArch: String } = ''
)
AND (
has({dtypes: Array(String) }, o.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric)
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
AND notEmpty(o.dtype)
AND notEmpty(o.device)
AND notEmpty(device)
ORDER BY
w.head_branch,
head_branch,
event_time DESC
5 changes: 2 additions & 3 deletions torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
"deviceArch": "String",
"dtypes": "Array(String)",
"excludedMetrics": "Array(String)",
"filenames": "Array(String)",
"getJobId": "Bool",
"benchmarks": "Array(String)",
"granularity": "String",
"names": "Array(String)",
"models": "Array(String)",
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
Expand Down
119 changes: 73 additions & 46 deletions torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
Original file line number Diff line number Diff line change
@@ -1,66 +1,93 @@
--- This query is used to get the LLMs benchmark results from different experiments. It
--- queries the TPS and memory bandwidth for each model / quantization combos. This powers
--- the LLMs benchmark dashboard
WITH benchmarks AS (
SELECT
replaceOne(o.head_branch, 'refs/heads/', '') AS head_branch,
o.workflow_id AS workflow_id,
o.job_id AS job_id,
o.model.name AS model,
o.model.backend AS backend,
o.metric.name AS metric,
floor(arrayAvg(o.metric.benchmark_values), 2) AS actual,
floor(toFloat64(o.metric.target_value), 2) AS target,
o.benchmark.dtype AS dtype,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
tupleElement(o.runners [ 1 ], 'name')
) AS device,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
tupleElement(o.runners [ 1 ], 'type')
) AS arch,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp(o.timestamp)
) AS granularity_bucket
FROM
benchmark.oss_ci_benchmark_v3 o
WHERE
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
AND o.repo = {repo: String }
AND (
has({commits: Array(String) }, o.head_sha)
OR empty({commits: Array(String) })
)
AND (
has({benchmarks: Array(String) }, o.benchmark.name)
OR empty({benchmarks: Array(String) })
)
AND (
has({models: Array(String) }, o.model.name)
OR empty({models: Array(String) })
)
AND (
has({dtypes: Array(String) }, o.benchmark.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric.name)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric.name)
AND notEmpty(o.benchmark.dtype)
)
SELECT
DISTINCT o.workflow_id AS workflow_id,
-- As the JSON response is pretty big, only return the field if it's needed
IF({getJobId: Bool}, o.job_id, '') AS job_id,
o.name,
o.metric,
floor(toFloat64(o.actual), 2) AS actual,
floor(toFloat64(o.target), 2) AS target,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp64Milli(o.timestamp)
) AS granularity_bucket,
o.dtype,
o.device,
-- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) as arch
DISTINCT workflow_id,
job_id,
CONCAT(model, ' ', backend) AS name,
metric,
actual,
target,
dtype,
device,
arch,
granularity_bucket
FROM
benchmark.oss_ci_benchmark_v2 o
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
benchmarks
WHERE
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
AND (
has({branches: Array(String) }, w.head_branch)
OR empty({branches: Array(String) })
)
AND (
has({commits: Array(String) }, w.head_sha)
OR empty({commits: Array(String) })
)
AND (
has({filenames: Array(String) }, o.filename)
OR empty({filenames: Array(String) })
(
has({models: Array(String) }, CONCAT(model, ' ', backend))
OR empty({models: Array(String) })
)
AND (
has({names: Array(String) }, o.name)
OR empty({names: Array(String) })
has({branches: Array(String) }, head_branch)
OR empty({branches: Array(String) })
)
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
AND (
CONCAT(
o.device,
device,
' (',
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
')'
) = {deviceArch: String }
OR {deviceArch: String } = ''
)
AND (
has({dtypes: Array(String) }, o.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric)
AND notEmpty(o.dtype)
AND notEmpty(o.device)
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
AND notEmpty(device)
ORDER BY
granularity_bucket DESC,
workflow_id DESC,
Expand Down
4 changes: 2 additions & 2 deletions torchci/clickhouse_queries/oss_ci_benchmark_names/params.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"deviceArch": "String",
"dtypes": "Array(String)",
"excludedMetrics": "Array(String)",
"filenames": "Array(String)",
"names": "Array(String)",
"benchmarks": "Array(String)",
"models": "Array(String)",
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
Expand Down
97 changes: 58 additions & 39 deletions torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql
Original file line number Diff line number Diff line change
@@ -1,51 +1,70 @@
--- This query is used by HUD benchmarks dashboards to get the list of experiment names
WITH benchmarks AS (
SELECT
o.benchmark.name AS benchmark,
o.model.name AS model,
o.model.backend AS backend,
o.metric.name AS metric,
o.benchmark.dtype AS dtype,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
tupleElement(o.runners [ 1 ], 'name')
) AS device,
IF(
empty(o.runners),
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
tupleElement(o.runners [ 1 ], 'type')
) AS arch
FROM
benchmark.oss_ci_benchmark_v3 o
WHERE
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
AND o.repo = {repo: String }
AND (
has({benchmarks: Array(String) }, o.benchmark.name)
OR empty({benchmarks: Array(String) })
)
AND (
has({models: Array(String) }, o.model.name)
OR empty({models: Array(String) })
)
AND (
has({dtypes: Array(String) }, o.benchmark.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric.name)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric.name)
AND notEmpty(o.benchmark.dtype)
)
SELECT
DISTINCT o.filename AS filename,
o.name,
o.metric,
o.dtype,
o.device,
-- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) AS arch
DISTINCT benchmark,
CONCAT(model, ' ', backend) AS name,
metric,
dtype,
device,
arch
FROM
benchmark.oss_ci_benchmark_v2 o
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
benchmarks
WHERE
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
AND (
has({filenames: Array(String) }, o.filename)
OR empty({filenames: Array(String) })
)
AND (
has({names: Array(String) }, o.name)
OR empty({names: Array(String) })
)
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
AND (
(
CONCAT(
o.device,
device,
' (',
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
')'
) = {deviceArch: String }
OR {deviceArch: String } = ''
)
AND (
has({dtypes: Array(String) }, o.dtype)
OR empty({dtypes: Array(String) })
)
AND (
NOT has({excludedMetrics: Array(String) }, o.metric)
OR empty({excludedMetrics: Array(String) })
)
AND notEmpty(o.metric)
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
AND notEmpty(o.dtype)
AND notEmpty(o.device)
AND notEmpty(device)
ORDER BY
o.filename,
o.name,
o.metric,
o.dtype,
o.device
benchmark,
name,
metric,
dtype,
device
4 changes: 2 additions & 2 deletions torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import { BranchAndCommit } from "lib/types";

export const REPOS = ["pytorch/pytorch", "pytorch/executorch"];
export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
"pytorch/pytorch": ["gpt_fast_benchmark"],
"pytorch/executorch": ["android-perf", "apple-perf"],
"pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
"pytorch/executorch": ["ExecuTorch"],
};
export const EXCLUDED_METRICS: string[] = ["load_status"];
export const DEFAULT_MODEL_NAME = "All Models";
Expand Down
Loading

0 comments on commit 28ccdef

Please sign in to comment.