From 1b23394b6a09a7195e62a21d3531bf7e09563ff0 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Mon, 6 Jan 2025 13:44:18 -0800
Subject: [PATCH] Group speed up geomean by model groups (#6145)

The chart legend is a bit long, but I guess it's ok for now.

### Testing


https://torchci-git-fork-huydhn-add-model-origins-fbopensource.vercel.app/benchmark/llms?startTime=Mon%2C%2030%20Dec%202024%2021%3A17%3A13%20GMT&stopTime=Mon%2C%2006%20Jan%202025%2021%3A17%3A13%20GMT&granularity=day&lBranch=main&lCommit=dd82365333c66bc5f75e44411005fe3480fb9c3a4a6c702cff4247f5920ede5a&rBranch=main&rCommit=2ace43552f163b259a737cb0fa93bbe2bafb16d14baa904635010fdd0d0e260c&repoName=pytorch%2Fao&modelName=All%20Models&backendName=All%20Backends&dtypeName=All%20DType&deviceName=All%20Devices

@jerryzh168 Do you want to change `torchao/_models` to just `torchao`?
It would help with the long chart legend string I think.
---
 .../oss_ci_benchmark_llms/query.sql             |  2 ++
 .../benchmark/llms/ModelGraphPanel.tsx          |  6 +++++-
 torchci/components/benchmark/llms/common.tsx    |  1 +
 torchci/lib/benchmark/llmUtils.ts               | 17 ++++++++++++++---
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
index 7a48fa7b5e..d393e48141 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
@@ -8,6 +8,7 @@ WITH benchmarks AS (
         o.job_id AS job_id,
         o.model.name AS model,
         o.model.backend AS backend,
+        o.model.origins AS origins,
         o.metric.name AS metric,
         floor(arrayAvg(o.metric.benchmark_values), 2) AS actual,
         floor(toFloat64(o.metric.target_value), 2) AS target,
@@ -63,6 +64,7 @@ SELECT
     job_id,
     model,
     backend,
+    origins,
     metric,
     actual,
     target,
diff --git a/torchci/components/benchmark/llms/ModelGraphPanel.tsx b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
index e62c4d14ce..771207f2c3 100644
--- a/torchci/components/benchmark/llms/ModelGraphPanel.tsx
+++ b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
@@ -104,7 +104,11 @@ export function GraphPanel({
               );
             })
             .map((record: LLMsBenchmarkData) => {
-              record.display = `${record.dtype} @ ${record.device} (${record.arch})`;
+              const origins =
+                record.origins.length !== 0
+                  ? `${record.origins.join(",")} `
+                  : "";
+              record.display = `${origins}${record.dtype} @ ${record.device} (${record.arch})`;
               return record;
             })
         : dataWithSpeedup
diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx
index 9efd5eaea0..bf76204015 100644
--- a/torchci/components/benchmark/llms/common.tsx
+++ b/torchci/components/benchmark/llms/common.tsx
@@ -43,6 +43,7 @@ export interface LLMsBenchmarkData {
   granularity_bucket: string;
   model: string;
   backend: string;
+  origins: string[];
   workflow_id: number;
   job_id: number;
   metric: string;
diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts
index 98ca36598b..d483aaa4fa 100644
--- a/torchci/lib/benchmark/llmUtils.ts
+++ b/torchci/lib/benchmark/llmUtils.ts
@@ -169,7 +169,8 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
       return;
     }
 
-    const k = `${r.granularity_bucket}+${r.workflow_id}+${r.job_id}+${r.backend}+${r.dtype}+${r.device}+${r.arch}+${r.metric}`;
+    const origins = r.origins.join(",");
+    const k = `${r.granularity_bucket}+${r.workflow_id}+${r.job_id}+${r.backend}+${r.dtype}+${origins}+${r.device}+${r.arch}+${r.metric}`;
     if (!(k in metricValues)) {
       metricValues[k] = [];
     }
@@ -182,12 +183,22 @@ export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
   Object.keys(metricValues).forEach((k: string) => {
     const gm = geomean(metricValues[k]);
 
-    const [bucket, workflowId, jobId, backend, dtype, device, arch, metric] =
-      k.split("+");
+    const [
+      bucket,
+      workflowId,
+      jobId,
+      backend,
+      dtype,
+      origins,
+      device,
+      arch,
+      metric,
+    ] = k.split("+");
     returnedGeomean.push({
       granularity_bucket: bucket,
       model: "",
       backend: backend,
+      origins: origins.split(","),
       workflow_id: Number(workflowId),
       job_id: Number(jobId),
       metric: `${metric} (geomean)`,