Skip to content

Commit 727fb8e

Browse files
author
pytorchbot
committed
2024-12-03 nightly release (4f47cc9)
1 parent e2a771d commit 727fb8e

File tree

44 files changed

+1878
-291
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1878
-291
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 104 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def transform(
310310
workflow_run_attempt: int,
311311
job_name: str,
312312
job_id: int,
313+
schema_version: str,
313314
) -> List:
314315
"""
315316
Transform the benchmark results into the format writable into the benchmark database
@@ -319,45 +320,91 @@ def transform(
319320
for r in benchmark_results:
320321
r["deviceInfo"]["device"] = job_name
321322

322-
# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
323-
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
324-
# However, the schema is going to be updated soon
325-
return [
326-
{
327-
# GH-info to identify where the benchmark is run
328-
"repo": repo,
329-
"head_branch": head_branch,
330-
"workflow_id": workflow_run_id,
331-
"run_attempt": workflow_run_attempt,
332-
"job_id": job_id,
333-
# The model
334-
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335-
"dtype": (
336-
r["benchmarkModel"]["quantization"]
337-
if r["benchmarkModel"]["quantization"]
338-
else "unknown"
339-
),
340-
# The metric value
341-
"metric": r["metric"],
342-
"actual": r["actualValue"],
343-
"target": r["targetValue"],
344-
# The device
345-
"device": r["deviceInfo"]["device"],
346-
"arch": r["deviceInfo"].get("os", ""),
347-
# Not used here, just set it to something unique here
348-
"filename": workflow_name,
349-
"test_name": app_type,
350-
"runner": job_name,
351-
}
352-
for r in benchmark_results
353-
]
323+
if schema_version == "v2":
324+
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
325+
return [
326+
{
327+
# GH-info to identify where the benchmark is run
328+
"repo": repo,
329+
"head_branch": head_branch,
330+
"workflow_id": workflow_run_id,
331+
"run_attempt": workflow_run_attempt,
332+
"job_id": job_id,
333+
# The model
334+
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335+
"dtype": (
336+
r["benchmarkModel"]["quantization"]
337+
if r["benchmarkModel"]["quantization"]
338+
else "unknown"
339+
),
340+
# The metric value
341+
"metric": r["metric"],
342+
"actual": r["actualValue"],
343+
"target": r["targetValue"],
344+
# The device
345+
"device": r["deviceInfo"]["device"],
346+
"arch": r["deviceInfo"].get("os", ""),
347+
# Not used here, just set it to something unique here
348+
"filename": workflow_name,
349+
"test_name": app_type,
350+
"runner": job_name,
351+
}
352+
for r in benchmark_results
353+
]
354+
elif schema_version == "v3":
355+
quantization = (
356+
r["benchmarkModel"]["quantization"]
357+
if r["benchmarkModel"]["quantization"]
358+
else "unknown"
359+
)
360+
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
361+
return [
362+
{
363+
"benchmark": {
364+
"name": "ExecuTorch",
365+
"mode": "inference",
366+
"dtype": quantization,
367+
"extra_info": {
368+
"app_type": app_type,
369+
},
370+
},
371+
"model": {
372+
"name": r["benchmarkModel"]["name"],
373+
"type": "OSS model",
374+
"backend": r["benchmarkModel"].get("backend", ""),
375+
"extra_info": {
376+
"quantization": quantization,
377+
},
378+
},
379+
"metric": {
380+
"name": r["metric"],
381+
"benchmark_values": [r["actualValue"]],
382+
"target_value": r["targetValue"],
383+
"extra_info": {
384+
"method": r.get("method", ""),
385+
},
386+
},
387+
"runners": [
388+
{
389+
"name": r["deviceInfo"]["device"],
390+
"type": r["deviceInfo"]["os"],
391+
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
392+
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
393+
}
394+
],
395+
}
396+
for r in benchmark_results
397+
]
354398

355399

356400
def main() -> None:
357401
args = parse_args()
358402

359-
# Across all devices
360-
all_benchmark_results = []
403+
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
404+
all_benchmark_results = {
405+
"v2": [],
406+
"v3": [],
407+
}
361408

362409
with open(args.artifacts) as f:
363410
for artifact in json.load(f):
@@ -384,23 +431,31 @@ def main() -> None:
384431
)
385432

386433
if benchmark_results:
387-
benchmark_results = transform(
388-
app_type,
389-
benchmark_results,
390-
args.repo,
391-
args.head_branch,
392-
args.workflow_name,
393-
args.workflow_run_id,
394-
args.workflow_run_attempt,
395-
job_name,
396-
extract_job_id(args.artifacts),
397-
)
398-
all_benchmark_results.extend(benchmark_results)
434+
for schema in all_benchmark_results.keys():
435+
results = transform(
436+
app_type,
437+
benchmark_results,
438+
args.repo,
439+
args.head_branch,
440+
args.workflow_name,
441+
args.workflow_run_id,
442+
args.workflow_run_attempt,
443+
job_name,
444+
extract_job_id(args.artifacts),
445+
schema,
446+
)
447+
all_benchmark_results[schema].extend(results)
448+
449+
for schema in all_benchmark_results.keys():
450+
if not all_benchmark_results.get(schema):
451+
continue
452+
453+
output_dir = os.path.join(args.output_dir, schema)
454+
os.mkdir(output_dir)
399455

400-
if all_benchmark_results:
401456
output_file = os.path.basename(args.artifacts)
402-
with open(f"{args.output_dir}/{output_file}", "w") as f:
403-
json.dump(all_benchmark_results, f)
457+
with open(f"{output_dir}/{output_file}", "w") as f:
458+
json.dump(all_benchmark_results[schema], f)
404459

405460

406461
if __name__ == "__main__":

.github/workflows/android-perf.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -298,15 +298,25 @@ jobs:
298298
--workflow-run-attempt ${{ github.run_attempt }}
299299
done
300300
301-
ls -lah benchmark-results
302-
303-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
304-
cat "${BENCHMARK_RESULTS}"
305-
echo
301+
for SCHEMA in v2 v3; do
302+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
303+
cat "${BENCHMARK_RESULTS}"
304+
echo
305+
done
306306
done
307307
308-
- name: Upload the benchmark results
308+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
309+
- name: Upload the benchmark results (v2)
310+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
311+
with:
312+
benchmark-results-dir: benchmark-results/v2
313+
dry-run: false
314+
schema-version: v2
315+
316+
- name: Upload the benchmark results (v3)
309317
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
310318
with:
311-
benchmark-results-dir: 'benchmark-results'
319+
benchmark-results-dir: benchmark-results/v3
312320
dry-run: false
321+
schema-version: v3
322+
github-token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/apple-perf.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -372,15 +372,25 @@ jobs:
372372
--workflow-run-attempt ${{ github.run_attempt }}
373373
done
374374
375-
ls -lah benchmark-results
376-
377-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
378-
cat "${BENCHMARK_RESULTS}"
379-
echo
375+
for SCHEMA in v2 v3; do
376+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
377+
cat "${BENCHMARK_RESULTS}"
378+
echo
379+
done
380380
done
381381
382-
- name: Upload the benchmark results
382+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
383+
- name: Upload the benchmark results (v2)
384+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
385+
with:
386+
benchmark-results-dir: benchmark-results/v2
387+
dry-run: false
388+
schema-version: v2
389+
390+
- name: Upload the benchmark results (v3)
383391
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
384392
with:
385-
benchmark-results-dir: 'benchmark-results'
393+
benchmark-results-dir: benchmark-results/v3
386394
dry-run: false
395+
schema-version: v3
396+
github-token: ${{ secrets.GITHUB_TOKEN }}

CMakeLists.txt

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,21 @@ if(NOT CMAKE_BUILD_TYPE)
5656
set(CMAKE_BUILD_TYPE Debug)
5757
endif()
5858

59+
# Setup RPATH.
60+
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
61+
# Use separate rpaths during build and install phases
62+
set(CMAKE_SKIP_BUILD_RPATH OFF)
63+
# Don't use the install-rpath during the build phase
64+
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
65+
# Automatically add all linked folders that are NOT in the build directory to
66+
# the rpath (per library?)
67+
# TODO: Doesn't work for us right now because we are not installing .so's into the
68+
# correct locations. For example we have libcustom_ops_aot_lib.so depending on
69+
# _portable_lib.so, which was eventually put under <site-packages>/executorch/extension/pybindings/
70+
# but this rpath is not automatically added because at build time it seems `portable_lib`
71+
# is being built under the same directory, so no extra rpath is being added. To
72+
# properly fix this we need to install `portable_lib` into the correct path.
73+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
5974
# ------------------------------ OPTIONS -------------------------------------
6075
# WARNING: Please don't add example specific options in this CMakeLists.txt.
6176
# Instead please use `find_package(executorch REQUIRED)` in the example
@@ -682,22 +697,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
682697
endif()
683698

684699
if(EXECUTORCH_BUILD_PYBIND)
685-
# Setup RPATH.
686-
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
687-
if(APPLE)
688-
set(CMAKE_MACOSX_RPATH ON)
689-
set(_rpath_portable_origin "@loader_path")
690-
else()
691-
set(_rpath_portable_origin $ORIGIN)
692-
endif(APPLE)
693-
# Use separate rpaths during build and install phases
694-
set(CMAKE_SKIP_BUILD_RPATH FALSE)
695-
# Don't use the install-rpath during the build phase
696-
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
697-
set(CMAKE_INSTALL_RPATH "${_rpath_portable_origin}")
698-
# Automatically add all linked folders that are NOT in the build directory to
699-
# the rpath (per library?)
700-
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
701700
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
702701

703702
if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
ScalarsToAttributePass,
4242
)
4343
from executorch.backends.arm._passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass
44+
from executorch.backends.arm._passes.unsqueeze_before_repeat_pass import (
45+
UnsqueezeBeforeRepeatPass,
46+
)
4447
from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
4548
UnsqueezeScalarPlaceholdersPass,
4649
)
@@ -66,6 +69,7 @@ def transform_to_backend_pipeline(
6669
self.add_pass(RemoveClonePass())
6770
self.add_pass(ConvertExpandCopyToRepeatPass())
6871
self.add_pass(DecomposeLayerNormPass())
72+
self.add_pass(UnsqueezeBeforeRepeatPass())
6973
self.add_pass(DecomposeVarPass())
7074
self.add_pass(ConvertMeanDimToAveragePool())
7175
self.add_pass(DecomposeMeanDimPass())
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
# pyre-unsafe
7+
import torch
8+
import torch.fx
9+
from executorch.backends.arm._passes.arm_pass_utils import (
10+
create_node,
11+
get_first_fake_tensor,
12+
)
13+
from executorch.exir.dialects._ops import ops as exir_ops
14+
from executorch.exir.pass_base import ExportPass, PassResult
15+
16+
17+
class UnsqueezeBeforeRepeatPass(ExportPass):
18+
"""
19+
A TOSA TILE op only supports rank(in) == rank(out).
20+
To support Pytorch's repeat which can also add dimensions,
21+
we add an explicit view op before which adds the new dimensions.
22+
New dimensions are appendend at the front, see
23+
https://pytorch.org/docs/stable/generated/torch.Tensor.expand.html
24+
25+
Original:
26+
repeat(multiples)
27+
After pass:
28+
view(shape = [1]*num_new_dims + old_shape)
29+
repeat(multiples)
30+
"""
31+
32+
def call(self, graph_module: torch.fx.GraphModule):
33+
modified_graph = False
34+
for node in graph_module.graph.nodes:
35+
if node.op != "call_function":
36+
continue
37+
if node.target != exir_ops.edge.aten.repeat.default:
38+
continue
39+
40+
old_shape = list(get_first_fake_tensor(node.all_input_nodes[0]).shape)
41+
old_rank = len(old_shape)
42+
multiples = node.args[1]
43+
new_rank = len(multiples)
44+
if old_rank == new_rank:
45+
continue
46+
47+
num_new_dims = new_rank - old_rank
48+
new_shape = [1] * num_new_dims + old_shape
49+
50+
with graph_module.graph.inserting_before(node):
51+
view_node = create_node(
52+
graph_module.graph,
53+
exir_ops.edge.aten.view_copy.default,
54+
(node.all_input_nodes[0], new_shape),
55+
)
56+
node.replace_input_with(node.all_input_nodes[0], view_node)
57+
modified_graph = True
58+
59+
if modified_graph:
60+
graph_module.recompile()
61+
graph_module = super().call(graph_module).graph_module
62+
return PassResult(graph_module, modified_graph)

0 commit comments

Comments
 (0)