diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json
index 877c4606db..f54711f6a6 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json
@@ -2,8 +2,8 @@
   "deviceArch": "String",
   "dtypes": "Array(String)",
   "excludedMetrics": "Array(String)",
-  "filenames": "Array(String)",
-  "names": "Array(String)",
+  "benchmarks": "Array(String)",
+  "models": "Array(String)",
   "repo": "String",
   "startTime": "DateTime64(3)",
   "stopTime": "DateTime64(3)"
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql
index b9caf852d2..fffd8c9716 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_branches/query.sql
@@ -1,47 +1,65 @@
 -- This query is used to get the list of branches and commits used by different
 -- OSS CI benchmark experiments. This powers HUD benchmarks dashboards
+WITH benchmarks AS (
+    SELECT
+        o.head_branch AS head_branch,
+        o.head_sha AS head_sha,
+        o.workflow_id AS id,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'device' ],
+            tupleElement(o.runners [ 1 ], 'name')
+        ) AS device,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
+            tupleElement(o.runners [ 1 ], 'type')
+        ) AS arch,
+        toStartOfDay(fromUnixTimestamp(o.timestamp)) AS event_time
+    FROM
+        benchmark.oss_ci_benchmark_v3 o
+    WHERE
+        o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
+        AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
+        AND o.repo = {repo: String }
+        AND (
+            has({benchmarks: Array(String) }, o.benchmark.name)
+            OR empty({benchmarks: Array(String) })
+        )
+        AND (
+            has({models: Array(String) }, o.model.name)
+            OR empty({models: Array(String) })
+        )
+        AND (
+            has({dtypes: Array(String) }, o.benchmark.dtype)
+            OR empty({dtypes: Array(String) })
+        )
+        AND (
+            NOT has({excludedMetrics: Array(String) }, o.metric.name)
+            OR empty({excludedMetrics: Array(String) })
+        )
+        AND notEmpty(o.metric.name)
+        AND notEmpty(o.benchmark.dtype)
+)
 SELECT
-    DISTINCT w.head_branch AS head_branch,
-    w.head_sha,
-    w.id,
-    toStartOfDay(fromUnixTimestamp64Milli(o.timestamp)) AS event_time,
-    o.filename
+    DISTINCT replaceOne(head_branch, 'refs/heads/', '') AS head_branch,
+    head_sha,
+    id,
+    event_time
 FROM
-    benchmark.oss_ci_benchmark_v2 o
-    LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
+    benchmarks
 WHERE
-    o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
-    AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
-    AND (
-        has({filenames: Array(String) }, o.filename)
-        OR empty({filenames: Array(String) })
-    )
-    AND (
-        has({names: Array(String) }, o.name)
-        OR empty({names: Array(String) })
-    )
     -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
-    AND (
+    (
         CONCAT(
-            o.device,
+            device,
             ' (',
-            IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
+            IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
             ')'
         ) = {deviceArch: String }
         OR {deviceArch: String } = ''
     )
-    AND (
-        has({dtypes: Array(String) }, o.dtype)
-        OR empty({dtypes: Array(String) })
-    )
-    AND (
-        NOT has({excludedMetrics: Array(String) }, o.metric)
-        OR empty({excludedMetrics: Array(String) })
-    )
-    AND notEmpty(o.metric)
-    AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
-    AND notEmpty(o.dtype)
-    AND notEmpty(o.device)
+    AND notEmpty(device)
 ORDER BY
-    w.head_branch,
+    head_branch,
     event_time DESC
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json
index 9ecd4e5ad9..d0099c2733 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json
@@ -4,10 +4,9 @@
   "deviceArch": "String",
   "dtypes": "Array(String)",
   "excludedMetrics": "Array(String)",
-  "filenames": "Array(String)",
-  "getJobId": "Bool",
+  "benchmarks": "Array(String)",
   "granularity": "String",
-  "names": "Array(String)",
+  "models": "Array(String)",
   "repo": "String",
   "startTime": "DateTime64(3)",
   "stopTime": "DateTime64(3)"
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
index a9b77ce6b6..31d34063ea 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
@@ -1,66 +1,93 @@
 --- This query is used to get the LLMs benchmark results from different experiments. It
 --- queries the TPS and memory bandwidth for each model / quantization combos. This powers
 --- the LLMs benchmark dashboard
+WITH benchmarks AS (
+    SELECT
+        replaceOne(o.head_branch, 'refs/heads/', '') AS head_branch,
+        o.workflow_id AS workflow_id,
+        o.job_id AS job_id,
+        o.model.name AS model,
+        o.model.backend AS backend,
+        o.metric.name AS metric,
+        floor(arrayAvg(o.metric.benchmark_values), 2) AS actual,
+        floor(toFloat64(o.metric.target_value), 2) AS target,
+        o.benchmark.dtype AS dtype,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'device' ],
+            tupleElement(o.runners [ 1 ], 'name')
+        ) AS device,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
+            tupleElement(o.runners [ 1 ], 'type')
+        ) AS arch,
+        DATE_TRUNC(
+            {granularity: String },
+            fromUnixTimestamp(o.timestamp)
+        ) AS granularity_bucket
+    FROM
+        benchmark.oss_ci_benchmark_v3 o
+    WHERE
+        o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
+        AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
+        AND o.repo = {repo: String }
+        AND (
+            has({commits: Array(String) }, o.head_sha)
+            OR empty({commits: Array(String) })
+        )
+        AND (
+            has({benchmarks: Array(String) }, o.benchmark.name)
+            OR empty({benchmarks: Array(String) })
+        )
+        AND (
+            has({models: Array(String) }, o.model.name)
+            OR empty({models: Array(String) })
+        )
+        AND (
+            has({dtypes: Array(String) }, o.benchmark.dtype)
+            OR empty({dtypes: Array(String) })
+        )
+        AND (
+            NOT has({excludedMetrics: Array(String) }, o.metric.name)
+            OR empty({excludedMetrics: Array(String) })
+        )
+        AND notEmpty(o.metric.name)
+        AND notEmpty(o.benchmark.dtype)
+)
 SELECT
-    DISTINCT o.workflow_id AS workflow_id,
-    -- As the JSON response is pretty big, only return the field if it's needed
-    IF({getJobId: Bool}, o.job_id, '') AS job_id,
-    o.name,
-    o.metric,
-    floor(toFloat64(o.actual), 2) AS actual,
-    floor(toFloat64(o.target), 2) AS target,
-    DATE_TRUNC(
-        {granularity: String },
-        fromUnixTimestamp64Milli(o.timestamp)
-    ) AS granularity_bucket,
-    o.dtype,
-    o.device,
-    -- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
-    IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) as arch
+    DISTINCT workflow_id,
+    job_id,
+    CONCAT(model, ' ', backend) AS name,
+    metric,
+    actual,
+    target,
+    dtype,
+    device,
+    arch,
+    granularity_bucket
 FROM
-    benchmark.oss_ci_benchmark_v2 o
-    LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
+    benchmarks
 WHERE
-    o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
-    AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
-    AND (
-        has({branches: Array(String) }, w.head_branch)
-        OR empty({branches: Array(String) })
-    )
-    AND (
-        has({commits: Array(String) }, w.head_sha)
-        OR empty({commits: Array(String) })
-    )
-    AND (
-        has({filenames: Array(String) }, o.filename)
-        OR empty({filenames: Array(String) })
+    (
+        has({models: Array(String) }, CONCAT(model, ' ', backend))
+        OR empty({models: Array(String) })
     )
     AND (
-        has({names: Array(String) }, o.name)
-        OR empty({names: Array(String) })
+        has({branches: Array(String) }, head_branch)
+        OR empty({branches: Array(String) })
     )
     -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
     AND (
         CONCAT(
-            o.device,
+            device,
             ' (',
-            IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
+            IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
             ')'
         ) = {deviceArch: String }
         OR {deviceArch: String } = ''
     )
-    AND (
-        has({dtypes: Array(String) }, o.dtype)
-        OR empty({dtypes: Array(String) })
-    )
-    AND (
-        NOT has({excludedMetrics: Array(String) }, o.metric)
-        OR empty({excludedMetrics: Array(String) })
-    )
-    AND notEmpty(o.metric)
-    AND notEmpty(o.dtype)
-    AND notEmpty(o.device)
-    AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
+    AND notEmpty(device)
 ORDER BY
     granularity_bucket DESC,
     workflow_id DESC,
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json b/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json
index 877c4606db..f54711f6a6 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_names/params.json
@@ -2,8 +2,8 @@
   "deviceArch": "String",
   "dtypes": "Array(String)",
   "excludedMetrics": "Array(String)",
-  "filenames": "Array(String)",
-  "names": "Array(String)",
+  "benchmarks": "Array(String)",
+  "models": "Array(String)",
   "repo": "String",
   "startTime": "DateTime64(3)",
   "stopTime": "DateTime64(3)"
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql
index 18ffc97d43..6af360e113 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_names/query.sql
@@ -1,51 +1,70 @@
 --- This query is used by HUD benchmarks dashboards to get the list of experiment names
+WITH benchmarks AS (
+    SELECT
+        o.benchmark.name AS benchmark,
+        o.model.name AS model,
+        o.model.backend AS backend,
+        o.metric.name AS metric,
+        o.benchmark.dtype AS dtype,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'device' ],
+            tupleElement(o.runners [ 1 ], 'name')
+        ) AS device,
+        IF(
+            empty(o.runners),
+            tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
+            tupleElement(o.runners [ 1 ], 'type')
+        ) AS arch
+    FROM
+        benchmark.oss_ci_benchmark_v3 o
+    WHERE
+        o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
+        AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
+        AND o.repo = {repo: String }
+        AND (
+            has({benchmarks: Array(String) }, o.benchmark.name)
+            OR empty({benchmarks: Array(String) })
+        )
+        AND (
+            has({models: Array(String) }, o.model.name)
+            OR empty({models: Array(String) })
+        )
+        AND (
+            has({dtypes: Array(String) }, o.benchmark.dtype)
+            OR empty({dtypes: Array(String) })
+        )
+        AND (
+            NOT has({excludedMetrics: Array(String) }, o.metric.name)
+            OR empty({excludedMetrics: Array(String) })
+        )
+        AND notEmpty(o.metric.name)
+        AND notEmpty(o.benchmark.dtype)
+)
 SELECT
-    DISTINCT o.filename AS filename,
-    o.name,
-    o.metric,
-    o.dtype,
-    o.device,
-    -- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
-    IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) AS arch
+    DISTINCT benchmark,
+    CONCAT(model, ' ', backend) AS name,
+    metric,
+    dtype,
+    device,
+    arch
 FROM
-    benchmark.oss_ci_benchmark_v2 o
-    LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
+    benchmarks
 WHERE
-    o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
-    AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
-    AND (
-        has({filenames: Array(String) }, o.filename)
-        OR empty({filenames: Array(String) })
-    )
-    AND (
-        has({names: Array(String) }, o.name)
-        OR empty({names: Array(String) })
-    )
     -- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
-    AND (
+    (
         CONCAT(
-            o.device,
+            device,
             ' (',
-            IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
+            IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
             ')'
         ) = {deviceArch: String }
         OR {deviceArch: String } = ''
     )
-    AND (
-        has({dtypes: Array(String) }, o.dtype)
-        OR empty({dtypes: Array(String) })
-    )
-    AND (
-        NOT has({excludedMetrics: Array(String) }, o.metric)
-        OR empty({excludedMetrics: Array(String) })
-    )
-    AND notEmpty(o.metric)
-    AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
-    AND notEmpty(o.dtype)
-    AND notEmpty(o.device)
+    AND notEmpty(device)
 ORDER BY
-    o.filename,
-    o.name,
-    o.metric,
-    o.dtype,
-    o.device
+    benchmark,
+    name,
+    metric,
+    dtype,
+    device
diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx
index 2109a87619..790e09e983 100644
--- a/torchci/components/benchmark/llms/common.tsx
+++ b/torchci/components/benchmark/llms/common.tsx
@@ -2,8 +2,8 @@ import { BranchAndCommit } from "lib/types";
 
 export const REPOS = ["pytorch/pytorch", "pytorch/executorch"];
 export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
-  "pytorch/pytorch": ["gpt_fast_benchmark"],
-  "pytorch/executorch": ["android-perf", "apple-perf"],
+  "pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
+  "pytorch/executorch": ["ExecuTorch"],
 };
 export const EXCLUDED_METRICS: string[] = ["load_status"];
 export const DEFAULT_MODEL_NAME = "All Models";
diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts
index fbdc4e22a0..464e6d9b51 100644
--- a/torchci/lib/benchmark/llmUtils.ts
+++ b/torchci/lib/benchmark/llmUtils.ts
@@ -17,14 +17,9 @@ export function useBenchmark(
   const queryCollection = "benchmarks";
   const queryName = "oss_ci_benchmark_llms";
 
-  const queryParamsWithBranchAndCommit: { [key: string]: any } = {
-    getJobId: getJobId,
-    ...queryParams,
-  };
-
+  const queryParamsWithBranchAndCommit: { [key: string]: any } = queryParams;
   (queryParamsWithBranchAndCommit as { [key: string]: any })["branches"] =
     branchAndCommit.branch ? [branchAndCommit.branch] : [];
-
   (queryParamsWithBranchAndCommit as { [key: string]: any })["commits"] =
     branchAndCommit.commit ? [branchAndCommit.commit] : [];
 
diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx
index 7dab5724d1..07b0e5aae3 100644
--- a/torchci/pages/benchmark/llms.tsx
+++ b/torchci/pages/benchmark/llms.tsx
@@ -235,9 +235,9 @@ export default function Page() {
     deviceArch: deviceName === DEFAULT_DEVICE_NAME ? "" : deviceName,
     dtypes: dtypeName === DEFAULT_DTYPE_NAME ? [] : [dtypeName],
     excludedMetrics: EXCLUDED_METRICS,
-    filenames: REPO_TO_BENCHMARKS[repoName],
+    benchmarks: REPO_TO_BENCHMARKS[repoName],
     granularity: granularity,
-    names: modelName === DEFAULT_MODEL_NAME ? [] : [modelName],
+    models: modelName === DEFAULT_MODEL_NAME ? [] : [modelName],
     repo: repoName,
     startTime: dayjs(startTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
     stopTime: dayjs(stopTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),