Skip to content

Commit bf41402

Browse files
author
pytorchbot
committed
2024-11-22 nightly release (fc42a4e)
1 parent 18e7b87 commit bf41402

File tree

66 files changed

+1596
-677
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1596
-677
lines changed

.ci/scripts/test_llama.sh

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,41 @@ set -exu
99
# shellcheck source=/dev/null
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
MODEL_NAME=$1 # stories110M
13-
BUILD_TOOL=$2 # buck2 or cmake
14-
DTYPE=$3 # fp16, bf16, or fp32
15-
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
16-
UPLOAD_DIR=${5:-}
12+
while [[ $# -gt 0 ]]; do
13+
case "$1" in
14+
-model)
15+
MODEL_NAME="$2" # stories110M
16+
shift 2
17+
;;
18+
-build_tool)
19+
BUILD_TOOL="$2" # buck2 or cmake
20+
shift 2
21+
;;
22+
-dtype)
23+
DTYPE="$2" # fp16, bf16, or fp32
24+
shift 2
25+
;;
26+
-mode)
27+
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
28+
shift 2
29+
;;
30+
-upload)
31+
UPLOAD_DIR="$2"
32+
shift 2
33+
;;
34+
*)
35+
echo "Unknown option: $1"
36+
usage
37+
;;
38+
esac
39+
done
40+
41+
# Default mode to xnnpack+custom if not set
42+
MODE=${MODE:-"xnnpack+custom"}
43+
44+
# Default UPLOAD_DIR to empty string if not set
45+
UPLOAD_DIR="${UPLOAD_DIR:-}"
46+
1747
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1848
echo "Expecting atleast 4 positional arguments"
1949
echo "Usage: [...]"
@@ -150,7 +180,7 @@ cleanup_files() {
150180
}
151181

152182
prepare_artifacts_upload() {
153-
if [ -n "$UPLOAD_DIR" ]; then
183+
if [ -n "${UPLOAD_DIR}" ]; then
154184
echo "Preparing for uploading generated artifacs"
155185
zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
156186
mkdir -p "${UPLOAD_DIR}"

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
# Install requirements for export_llama
118118
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
119119
# Test llama2
120-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
120+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
121121
122122
test-llama-runner-linux-android:
123123
name: test-llama-runner-linux-android
@@ -393,7 +393,7 @@ jobs:
393393
# Install requirements for export_llama
394394
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395395
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
396+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
397397
398398
test-phi-3-mini-runner-linux:
399399
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ jobs:
261261
# Install requirements for export_llama
262262
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
263263
# Test llama2
264-
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
264+
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
265265
266266
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
267267
# test-llava-runner-macos:

CONTRIBUTING.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,15 @@ for basics.
283283
- If the reviewers have requests or questions, follow up with them.
284284
- The goal of the reviewer is to ensure that the code in the `main` branch of
285285
the repo is consistent, maintainable, and of high quality.
286-
1. Once approved, your reviewer will import the PR into Meta's internal system
287-
and merge it from there.
288-
- If the PR is approved and not merged within a few business days, please
289-
comment on the PR to ask about its status.
286+
1. Once the PR has been approved,
287+
- If you have the "write permission" in this repo, you can merge it yourself
288+
by clicking the "Squash and merge" button once it is green and all CI
289+
signals are passing.
290+
- If you don't have "write permission" in this repo, the reviewer will take
291+
care of the PR. The reviewer may import the PR into Meta's internal system
292+
to validate it against internal CI.
293+
- If the PR is approved but not merged within 5 business days, please comment
294+
on the PR to ask about its status.
290295
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
291296
will only merge PRs that fix the broken jobs until all critical jobs are
292297
fixed.

backends/arm/test/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class arm_test_options(Enum):
2929
corstone300 = auto()
3030
dump_path = auto()
3131
date_format = auto()
32+
fast_fvp = auto()
3233

3334

3435
_test_options: dict[arm_test_options, Any] = {}
@@ -41,6 +42,7 @@ def pytest_addoption(parser):
4142
parser.addoption("--arm_run_corstone300", action="store_true")
4243
parser.addoption("--default_dump_path", default=None)
4344
parser.addoption("--date_format", default="%d-%b-%H:%M:%S")
45+
parser.addoption("--fast_fvp", action="store_true")
4446

4547

4648
def pytest_configure(config):
@@ -63,6 +65,7 @@ def pytest_configure(config):
6365
f"Supplied argument 'default_dump_path={dump_path}' that does not exist or is not a directory."
6466
)
6567
_test_options[arm_test_options.date_format] = config.option.date_format
68+
_test_options[arm_test_options.fast_fvp] = config.option.fast_fvp
6669
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
6770

6871

backends/arm/test/runner_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import numpy as np
1818
import torch
1919

20+
from executorch.backends.arm.test.common import arm_test_options, is_option_enabled
21+
2022
from torch.export import ExportedProgram
2123
from torch.fx.node import Node
2224

@@ -249,6 +251,10 @@ def run_corstone(
249251
for input_path in input_paths:
250252
cmd_line += f" -i {input_path}"
251253

254+
ethos_u_extra_args = ""
255+
if is_option_enabled(arm_test_options.fast_fvp):
256+
ethos_u_extra_args = ethos_u_extra_args + "--fast"
257+
252258
command_args = {
253259
"corstone-300": [
254260
"FVP_Corstone_SSE-300_Ethos-U55",
@@ -267,6 +273,8 @@ def run_corstone(
267273
"-C",
268274
"cpu0.semihosting-stack_base=0",
269275
"-C",
276+
f"ethosu.extra_args='{ethos_u_extra_args}'",
277+
"-C",
270278
"cpu0.semihosting-heap_limit=0",
271279
"-C",
272280
f"cpu0.semihosting-cmd_line='{cmd_line}'",
@@ -282,6 +290,8 @@ def run_corstone(
282290
"-C",
283291
"mps4_board.visualisation.disable-visualisation=1",
284292
"-C",
293+
"vis_hdlcd.disable_visualisation=1",
294+
"-C",
285295
"mps4_board.telnetterminal0.start_telnet=0",
286296
"-C",
287297
"mps4_board.uart0.out_file='-'",
@@ -296,6 +306,8 @@ def run_corstone(
296306
"-C",
297307
"mps4_board.subsystem.cpu0.semihosting-heap_limit=0",
298308
"-C",
309+
f"mps4_board.subsystem.ethosu.extra_args='{ethos_u_extra_args}'",
310+
"-C",
299311
f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'",
300312
"-a",
301313
elf_path,

backends/cadence/aot/functions.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@
7777
- arg_meta: null
7878
kernel_name: torch::executor::gelu_out
7979

80+
- op: hardtanh.out
81+
kernels:
82+
- arg_meta: null
83+
kernel_name: torch::executor::hardtanh_out
84+
85+
- op: max_pool2d_with_indices.out
86+
kernels:
87+
- arg_meta: null
88+
kernel_name: torch::executor::max_pool2d_with_indices_out
89+
8090
- op: mean.out
8191
kernels:
8292
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,26 @@
6262
- arg_meta: null
6363
kernel_name: torch::executor::full_out
6464

65+
- op: gelu.out
66+
kernels:
67+
- arg_meta: null
68+
kernel_name: torch::executor::gelu_out
69+
70+
- op: hardtanh.out
71+
kernels:
72+
- arg_meta: null
73+
kernel_name: torch::executor::hardtanh_out
74+
75+
- op: max_pool2d_with_indices.out
76+
kernels:
77+
- arg_meta: null
78+
kernel_name: torch::executor::max_pool2d_with_indices_out
79+
6580
- op: mean.out
6681
kernels:
6782
- arg_meta: null
68-
kernel_name: cadence::impl::HiFi::mean_dim_out
69-
83+
kernel_name: cadence::impl::HiFi::mean_dim_out
84+
7085
- op: mul.out
7186
kernels:
7287
- arg_meta: null

backends/cadence/aot/fuse_ops.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,9 @@ def fuse_quantized_batch_norm_with_conv(
426426
# Note: there is a quantized.conv2d.new operator in the resulting graph
427427
# that takes a torch.classes.quantized.Conv2dPackedParamsBase as one of the input
428428
# this prevents us to directly call graph_module.recompile().
429+
# pyre-fixme[16]: `GraphModule` has no attribute `_code`.
430+
# pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute
431+
# `python_code`.
429432
graph_module._code = graph_module._graph.python_code(root_module="self").src
430433

431434
def __init__(self):

backends/cadence/aot/quantizer/patterns.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def partition_types(self) -> List[OpOverload]:
7575
def get_anchors(
7676
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
7777
) -> PartitionAnchors:
78+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
7879
addmm_node = fused_partition[0].nodes[-1]
7980

8081
bias_qspec = DerivedQuantizationSpec(
@@ -107,6 +108,7 @@ def partition_types(self) -> List[OpOverload]:
107108
def get_anchors(
108109
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
109110
) -> PartitionAnchors:
111+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
110112
bmm_node = fused_partition[0].nodes[-1]
111113

112114
return PartitionAnchors(
@@ -127,6 +129,7 @@ def partition_types(self) -> List[OpOverload]:
127129
def get_anchors(
128130
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
129131
) -> PartitionAnchors:
132+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
130133
conv1d_node = fused_partition[0].nodes[-1]
131134

132135
bias_qspec = DerivedQuantizationSpec(
@@ -165,6 +168,7 @@ def partition_types(self) -> List[OpOverload]:
165168
def get_anchors(
166169
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
167170
) -> PartitionAnchors:
171+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
168172
conv2d_node = fused_partition[0].nodes[-1]
169173

170174
bias_qspec = DerivedQuantizationSpec(
@@ -203,6 +207,7 @@ def partition_types(self) -> List[OpOverload]:
203207
def get_anchors(
204208
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
205209
) -> PartitionAnchors:
210+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
206211
layer_norm_node = fused_partition[0].nodes[-1]
207212

208213
others = [(layer_norm_node, 1)]
@@ -237,6 +242,7 @@ def partition_types(self) -> List[OpOverload]:
237242
def get_anchors(
238243
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
239244
) -> PartitionAnchors:
245+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
240246
linear_node = fused_partition[0].nodes[-1]
241247

242248
bias_qspec = DerivedQuantizationSpec(
@@ -275,6 +281,7 @@ def partition_types(self) -> List[OpOverload]:
275281
def get_anchors(
276282
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
277283
) -> PartitionAnchors:
284+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
278285
matmul_node = fused_partition[0].nodes[-1]
279286

280287
return PartitionAnchors(
@@ -297,6 +304,7 @@ def partition_types(self) -> List[OpOverload]:
297304
def get_anchors(
298305
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
299306
) -> PartitionAnchors:
307+
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
300308
relu_node = fused_partition[0].nodes[-1]
301309

302310
return PartitionAnchors(

0 commit comments

Comments
 (0)