Skip to content

Commit

Permalink
2024-11-22 nightly release (fc42a4e)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Nov 22, 2024
1 parent 18e7b87 commit bf41402
Show file tree
Hide file tree
Showing 66 changed files with 1,596 additions and 677 deletions.
42 changes: 36 additions & 6 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,41 @@ set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

MODEL_NAME=$1 # stories110M
BUILD_TOOL=$2 # buck2 or cmake
DTYPE=$3 # fp16, bf16, or fp32
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
UPLOAD_DIR=${5:-}
while [[ $# -gt 0 ]]; do
case "$1" in
-model)
MODEL_NAME="$2" # stories110M
shift 2
;;
-build_tool)
BUILD_TOOL="$2" # buck2 or cmake
shift 2
;;
-dtype)
DTYPE="$2" # fp16, bf16, or fp32
shift 2
;;
-mode)
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
shift 2
;;
-upload)
UPLOAD_DIR="$2"
shift 2
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done

# Default mode to xnnpack+custom if not set
MODE=${MODE:-"xnnpack+custom"}

# Default UPLOAD_DIR to empty string if not set
UPLOAD_DIR="${UPLOAD_DIR:-}"

if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
echo "Expecting atleast 4 positional arguments"
echo "Usage: [...]"
Expand Down Expand Up @@ -150,7 +180,7 @@ cleanup_files() {
}

prepare_artifacts_upload() {
if [ -n "$UPLOAD_DIR" ]; then
if [ -n "${UPLOAD_DIR}" ]; then
echo "Preparing for uploading generated artifacs"
zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
mkdir -p "${UPLOAD_DIR}"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
test-llama-runner-linux-android:
name: test-llama-runner-linux-android
Expand Down Expand Up @@ -393,7 +393,7 @@ jobs:
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
test-phi-3-mini-runner-linux:
name: test-phi-3-mini-runner-linux
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ jobs:
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
# test-llava-runner-macos:
Expand Down
13 changes: 9 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,15 @@ for basics.
- If the reviewers have requests or questions, follow up with them.
- The goal of the reviewer is to ensure that the code in the `main` branch of
the repo is consistent, maintainable, and of high quality.
1. Once approved, your reviewer will import the PR into Meta's internal system
and merge it from there.
- If the PR is approved and not merged within a few business days, please
comment on the PR to ask about its status.
1. Once the PR has been approved,
- If you have the "write permission" in this repo, you can merge it yourself
by clicking the "Squash and merge" button once it is green and all CI
signals are passing.
- If you don't have "write permission" in this repo, the reviewer will take
care of the PR. The reviewer may import the PR into Meta's internal system
to validate it against internal CI.
- If the PR is approved but not merged within 5 business days, please comment
on the PR to ask about its status.
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
will only merge PRs that fix the broken jobs until all critical jobs are
fixed.
Expand Down
3 changes: 3 additions & 0 deletions backends/arm/test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class arm_test_options(Enum):
corstone300 = auto()
dump_path = auto()
date_format = auto()
fast_fvp = auto()


_test_options: dict[arm_test_options, Any] = {}
Expand All @@ -41,6 +42,7 @@ def pytest_addoption(parser):
parser.addoption("--arm_run_corstone300", action="store_true")
parser.addoption("--default_dump_path", default=None)
parser.addoption("--date_format", default="%d-%b-%H:%M:%S")
parser.addoption("--fast_fvp", action="store_true")


def pytest_configure(config):
Expand All @@ -63,6 +65,7 @@ def pytest_configure(config):
f"Supplied argument 'default_dump_path={dump_path}' that does not exist or is not a directory."
)
_test_options[arm_test_options.date_format] = config.option.date_format
_test_options[arm_test_options.fast_fvp] = config.option.fast_fvp
logging.basicConfig(level=logging.INFO, stream=sys.stdout)


Expand Down
12 changes: 12 additions & 0 deletions backends/arm/test/runner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import numpy as np
import torch

from executorch.backends.arm.test.common import arm_test_options, is_option_enabled

from torch.export import ExportedProgram
from torch.fx.node import Node

Expand Down Expand Up @@ -249,6 +251,10 @@ def run_corstone(
for input_path in input_paths:
cmd_line += f" -i {input_path}"

ethos_u_extra_args = ""
if is_option_enabled(arm_test_options.fast_fvp):
ethos_u_extra_args = ethos_u_extra_args + "--fast"

command_args = {
"corstone-300": [
"FVP_Corstone_SSE-300_Ethos-U55",
Expand All @@ -267,6 +273,8 @@ def run_corstone(
"-C",
"cpu0.semihosting-stack_base=0",
"-C",
f"ethosu.extra_args='{ethos_u_extra_args}'",
"-C",
"cpu0.semihosting-heap_limit=0",
"-C",
f"cpu0.semihosting-cmd_line='{cmd_line}'",
Expand All @@ -282,6 +290,8 @@ def run_corstone(
"-C",
"mps4_board.visualisation.disable-visualisation=1",
"-C",
"vis_hdlcd.disable_visualisation=1",
"-C",
"mps4_board.telnetterminal0.start_telnet=0",
"-C",
"mps4_board.uart0.out_file='-'",
Expand All @@ -296,6 +306,8 @@ def run_corstone(
"-C",
"mps4_board.subsystem.cpu0.semihosting-heap_limit=0",
"-C",
f"mps4_board.subsystem.ethosu.extra_args='{ethos_u_extra_args}'",
"-C",
f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'",
"-a",
elf_path,
Expand Down
10 changes: 10 additions & 0 deletions backends/cadence/aot/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@
- arg_meta: null
kernel_name: torch::executor::gelu_out

- op: hardtanh.out
kernels:
- arg_meta: null
kernel_name: torch::executor::hardtanh_out

- op: max_pool2d_with_indices.out
kernels:
- arg_meta: null
kernel_name: torch::executor::max_pool2d_with_indices_out

- op: mean.out
kernels:
- arg_meta: null
Expand Down
19 changes: 17 additions & 2 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,26 @@
- arg_meta: null
kernel_name: torch::executor::full_out

- op: gelu.out
kernels:
- arg_meta: null
kernel_name: torch::executor::gelu_out

- op: hardtanh.out
kernels:
- arg_meta: null
kernel_name: torch::executor::hardtanh_out

- op: max_pool2d_with_indices.out
kernels:
- arg_meta: null
kernel_name: torch::executor::max_pool2d_with_indices_out

- op: mean.out
kernels:
- arg_meta: null
kernel_name: cadence::impl::HiFi::mean_dim_out
kernel_name: cadence::impl::HiFi::mean_dim_out

- op: mul.out
kernels:
- arg_meta: null
Expand Down
3 changes: 3 additions & 0 deletions backends/cadence/aot/fuse_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,9 @@ def fuse_quantized_batch_norm_with_conv(
# Note: there is a quantized.conv2d.new operator in the resulting graph
# that takes a torch.classes.quantized.Conv2dPackedParamsBase as one of the input
# this prevents us to directly call graph_module.recompile().
# pyre-fixme[16]: `GraphModule` has no attribute `_code`.
# pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute
# `python_code`.
graph_module._code = graph_module._graph.python_code(root_module="self").src

def __init__(self):
Expand Down
8 changes: 8 additions & 0 deletions backends/cadence/aot/quantizer/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
addmm_node = fused_partition[0].nodes[-1]

bias_qspec = DerivedQuantizationSpec(
Expand Down Expand Up @@ -107,6 +108,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
bmm_node = fused_partition[0].nodes[-1]

return PartitionAnchors(
Expand All @@ -127,6 +129,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
conv1d_node = fused_partition[0].nodes[-1]

bias_qspec = DerivedQuantizationSpec(
Expand Down Expand Up @@ -165,6 +168,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
conv2d_node = fused_partition[0].nodes[-1]

bias_qspec = DerivedQuantizationSpec(
Expand Down Expand Up @@ -203,6 +207,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
layer_norm_node = fused_partition[0].nodes[-1]

others = [(layer_norm_node, 1)]
Expand Down Expand Up @@ -237,6 +242,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
linear_node = fused_partition[0].nodes[-1]

bias_qspec = DerivedQuantizationSpec(
Expand Down Expand Up @@ -275,6 +281,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
matmul_node = fused_partition[0].nodes[-1]

return PartitionAnchors(
Expand All @@ -297,6 +304,7 @@ def partition_types(self) -> List[OpOverload]:
def get_anchors(
self, gm: fx.GraphModule, fused_partition: List[fx.GraphModule]
) -> PartitionAnchors:
# pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
relu_node = fused_partition[0].nodes[-1]

return PartitionAnchors(
Expand Down
3 changes: 3 additions & 0 deletions backends/cadence/hifi/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gelu.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_hardtanh.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_max_pool2d_with_indices.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_softmax.cpp"
Expand Down
2 changes: 2 additions & 0 deletions backends/cadence/reference/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_hardtanh.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_max_pool2d_with_indices.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mean.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mul.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
)
out_to_dim_node_user.args = tuple(out_to_dim_node_user_new_args)

# pyre-fixme[29]: `Union[torch._tensor.Tensor,
# torch.nn.modules.module.Module]` is not a function.
graph_module.erase_node(out_to_dim_node)
# pyre-fixme[29]: `Union[torch._tensor.Tensor,
# torch.nn.modules.module.Module]` is not a function.
graph_module.erase_node(node)
# TODO: Handle other merging rules, including 1->N, N->1, N->N
return PassResult(graph_module, True)
2 changes: 2 additions & 0 deletions backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
}
}
add_cached_delegate(signature, qnn_manager);
// This backend does not need its processed data after Init.
processed->Free();
return qnn_manager;
}

Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/serialization/qc_compiler_spec.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum QcomChipset: int {
SM8550 = 43,
SSG2115P = 46,
SM8650 = 57,
SA8295 = 39,
}

/// Indicate the information of the specified SoC.
Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/serialization/qc_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class QcomChipset(IntEnum):
SM8550 = 43 # v73
SSG2115P = 46 # v73
SM8650 = 57 # v75
SA8295 = 39 # v68


@dataclass
Expand All @@ -55,6 +56,7 @@ class SocInfo:
QcomChipset.SM8550: SocInfo(QcomChipset.SM8550, HtpInfo(HtpArch.V73, 8)),
QcomChipset.SM8650: SocInfo(QcomChipset.SM8650, HtpInfo(HtpArch.V75, 8)),
QcomChipset.SSG2115P: SocInfo(QcomChipset.SSG2115P, HtpInfo(HtpArch.V73, 2)),
QcomChipset.SA8295: SocInfo(QcomChipset.SA8295, HtpInfo(HtpArch.V68, 8)),
}


Expand Down
2 changes: 2 additions & 0 deletions backends/qualcomm/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@ def get_soc_to_arch_map():
"SM8550": HtpArch.V73,
"SM8475": HtpArch.V69,
"SM8450": HtpArch.V69,
"SA8295": HtpArch.V68,
}


Expand All @@ -977,6 +978,7 @@ def get_soc_to_chipset_map():
"SM8550": QcomChipset.SM8550,
"SM8475": QcomChipset.SM8475,
"SM8450": QcomChipset.SM8450,
"SA8295": QcomChipset.SA8295,
}


Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/_passes/int4_weight_only_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def _vk_replace_linear_int4(
scales_precision=scales_precision,
)
if copy_weights and child.weight.device != torch.device("meta"):
# pyre-fixme[16]: `Module` has no attribute `weight`.
new_linear.weight = child.weight
setattr(module, name, new_linear)
else:
Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {

void main() {
const u16vec3 out_pos = u16vec3(
gl_GlobalInvocationID.x / (out_limits.y * out_limits.z),
(gl_GlobalInvocationID.x / out_limits.z) % out_limits.y,
gl_GlobalInvocationID.x % out_limits.z);
if (any(greaterThanEqual(out_pos, out_limits))) {
gl_GlobalInvocationID.x / out_limits.y,
gl_GlobalInvocationID.x % out_limits.y,
0);
if (out_pos.x >= out_limits.x) {
return;
}

Expand Down
Loading

0 comments on commit bf41402

Please sign in to comment.