Skip to content

Commit

Permalink
2024-11-03 nightly release (97a4600)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Nov 3, 2024
1 parent 848beb5 commit 480c4b5
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e47e8794499a4a0130ff4efb8713ff93f4b40c36
c8a648d4dffb9f0133ff4a2ea0e660b42105d3ad
4 changes: 4 additions & 0 deletions backends/cadence/aot/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@
kernels:
- arg_meta: null
kernel_name: impl::reference::quantized_layer_norm_out
- func: cadence::quantized_layer_norm.per_tensor_out(Tensor input, float in_scale, int in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::reference::quantized_layer_norm_per_tensor_out

- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
Expand Down
4 changes: 4 additions & 0 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@
kernels:
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_layer_norm_out
- func: cadence::quantized_layer_norm.per_tensor_out(Tensor input, float in_scale, int in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: cadence::impl::HiFi::quantized_layer_norm_per_tensor_out

- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
Expand Down
21 changes: 21 additions & 0 deletions backends/cadence/aot/ops_registrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
lib.define(
"quantized_layer_norm.out(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor (a!)"
)
lib.define(
"quantized_layer_norm.per_tensor(Tensor X, float X_scale, int X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
)
lib.define(
"quantized_layer_norm.per_tensor_out(Tensor X, float X_scale, int X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor (a!)"
)

lib.define(
"quantized_linear(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
Expand Down Expand Up @@ -180,6 +186,21 @@ def quantized_layer_norm_meta(
return input.new_empty(input.size(), dtype=input.dtype)


@register_fake("cadence::quantized_layer_norm.per_tensor")
def quantized_layer_norm_per_tensor_meta(
input: torch.Tensor,
X_scale: float,
X_zero_point: int,
normalized_shape: int,
weight: torch.Tensor,
bias: torch.Tensor,
eps: float,
output_scale: float,
output_zero_point: int,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=input.dtype)


@register_fake("cadence::quantized_relu")
def quantized_relu_meta(
X: torch.Tensor,
Expand Down
44 changes: 41 additions & 3 deletions backends/cadence/hifi/operators/quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace native {
// Compute quantized layer_norm. The current implementation assumes that the
// input is per-tensor quantized.
template <typename T>
void quantized_layer_norm_(
void quantized_layer_norm_per_tensor_(
const Tensor& input,
float input_scale,
int64_t input_zero_point,
Expand Down Expand Up @@ -107,7 +107,7 @@ void quantized_layer_norm_(
int64_t input_zero_point = in_zero_point.const_data_ptr<int64_t>()[0];

// Call other overload
quantized_layer_norm_<T>(
quantized_layer_norm_per_tensor_<T>(
input,
input_scale,
input_zero_point,
Expand All @@ -120,7 +120,7 @@ void quantized_layer_norm_(
}

void quantized_layer_norm_out(
KernelRuntimeContext& ctx,
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& in_scale,
const Tensor& in_zero_point,
Expand Down Expand Up @@ -157,6 +157,44 @@ void quantized_layer_norm_out(
#undef typed_quantized_layer_norm
}

void quantized_layer_norm_per_tensor_out(
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
double in_scale,
int64_t in_zero_point,
__ET_UNUSED const IntArrayRef normalized_shape,
const Tensor& weight,
const Tensor& bias,
double eps,
double output_scale,
int64_t output_zero_point,
Tensor& out) {
#define typed_quantized_layer_norm(ctype, dtype) \
case ScalarType::dtype: { \
quantized_layer_norm_per_tensor_<ctype>( \
input, \
in_scale, \
in_zero_point, \
weight, \
bias, \
eps, \
output_scale, \
output_zero_point, \
out); \
break; \
}

ScalarType dtype = input.scalar_type();
switch (dtype) {
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_layer_norm)
default:
ET_DCHECK_MSG(
false, "Unhandled dtype %s", torch::executor::toString(dtype));
}

#undef typed_quantized_layer_norm
}

}; // namespace native
}; // namespace HiFi
}; // namespace impl
Expand Down
58 changes: 51 additions & 7 deletions backends/cadence/reference/operators/quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@

#include <cmath>

using executorch::aten::Tensor;
using executorch::runtime::getLeadingDims;
using executorch::runtime::KernelRuntimeContext;
using ::executorch::aten::IntArrayRef;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::getLeadingDims;
using ::executorch::runtime::KernelRuntimeContext;

namespace impl {
namespace reference {
Expand All @@ -22,7 +24,7 @@ namespace native {
// Compute quantized layer_norm. The current implementation assumes that the
// input is per-tensor quantized.
template <typename T>
void quantized_layer_norm_(
void quantized_layer_norm_per_tensor_(
const Tensor& input,
double input_scale,
int64_t input_zero_point,
Expand Down Expand Up @@ -98,7 +100,7 @@ void quantized_layer_norm_(
int64_t input_zero_point = in_zero_point.const_data_ptr<int64_t>()[0];

// Call other overload
quantized_layer_norm_<T>(
quantized_layer_norm_per_tensor_<T>(
input,
input_scale,
input_zero_point,
Expand All @@ -111,11 +113,11 @@ void quantized_layer_norm_(
}

void quantized_layer_norm_out(
KernelRuntimeContext& ctx,
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
const Tensor& in_scale,
const Tensor& in_zero_point,
const executorch::aten::IntArrayRef normalized_shape,
__ET_UNUSED const executorch::aten::IntArrayRef normalized_shape,
const Tensor& weight,
const Tensor& bias,
double eps,
Expand Down Expand Up @@ -152,6 +154,48 @@ void quantized_layer_norm_out(
}
}

void quantized_layer_norm_per_tensor_out(
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& input,
double in_scale,
int64_t in_zero_point,
__ET_UNUSED const executorch::aten::IntArrayRef normalized_shape,
const Tensor& weight,
const Tensor& bias,
double eps,
double output_scale,
int64_t output_zero_point,
Tensor& out) {
if (input.scalar_type() == executorch::aten::ScalarType::Byte) {
quantized_layer_norm_per_tensor_<uint8_t>(
input,
in_scale,
in_zero_point,
weight,
bias,
eps,
output_scale,
output_zero_point,
out);
} else if (input.scalar_type() == executorch::aten::ScalarType::Char) {
quantized_layer_norm_per_tensor_<int8_t>(
input,
in_scale,
in_zero_point,
weight,
bias,
eps,
output_scale,
output_zero_point,
out);
} else {
ET_CHECK_MSG(
false,
"Unhandled input dtype %hhd",
static_cast<int8_t>(input.scalar_type()));
}
}

}; // namespace native
}; // namespace reference
}; // namespace impl
25 changes: 9 additions & 16 deletions examples/models/llama3_2_vision/preprocess/export_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,22 @@ def main():
strict=False,
)

# Executorch
# AOTInductor. Note: export AOTI before ExecuTorch, as
# ExecuTorch will modify the ExportedProgram.
torch._inductor.aot_compile(
ep.module(),
model.get_example_inputs(),
options={"aot_inductor.output_path": "preprocess_aoti.so"},
)

# Executorch.
edge_program = to_edge(
ep, compile_config=EdgeCompileConfig(_check_ir_validity=False)
)
et_program = edge_program.to_executorch()
with open("preprocess_et.pte", "wb") as file:
et_program.write_to_file(file)

# Export.
# ep = torch.export.export(
# model.get_eager_model(),
# model.get_example_inputs(),
# dynamic_shapes=model.get_dynamic_shapes(),
# strict=False,
# )
#
# # AOTInductor
# torch._inductor.aot_compile(
# ep.module(),
# model.get_example_inputs(),
# options={"aot_inductor.output_path": "preprocess_aoti.so"},
# )


if __name__ == "__main__":
main()
38 changes: 28 additions & 10 deletions examples/models/llama3_2_vision/preprocess/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)

from PIL import Image
from torch._inductor.package import package_aoti

from torchtune.models.clip.inference._transform import CLIPImageTransform

Expand Down Expand Up @@ -55,31 +56,46 @@ def initialize_models(resize_to_max_canvas: bool) -> Dict[str, Any]:
possible_resolutions=None,
)

# Eager model.
model = CLIPImageTransformModel(config)

# Exported model.
exported_model = torch.export.export(
model.get_eager_model(),
model.get_example_inputs(),
dynamic_shapes=model.get_dynamic_shapes(),
strict=False,
)

# aoti_path = torch._inductor.aot_compile(
# exported_model.module(),
# model.get_example_inputs(),
# )
# AOTInductor model.
so = torch._export.aot_compile(
exported_model.module(),
args=model.get_example_inputs(),
options={"aot_inductor.package": True},
dynamic_shapes=model.get_dynamic_shapes(),
)
aoti_path = "preprocess.pt2"
package_aoti(aoti_path, so)

edge_program = to_edge(
exported_model, compile_config=EdgeCompileConfig(_check_ir_validity=False)
)
executorch_model = edge_program.to_executorch()

# Re-export as ExecuTorch edits the ExportedProgram.
exported_model = torch.export.export(
model.get_eager_model(),
model.get_example_inputs(),
dynamic_shapes=model.get_dynamic_shapes(),
strict=False,
)

return {
"config": config,
"reference_model": reference_model,
"model": model,
"exported_model": exported_model,
# "aoti_path": aoti_path,
"aoti_path": aoti_path,
"executorch_model": executorch_model,
}

Expand Down Expand Up @@ -265,11 +281,13 @@ def run_preprocess(
), f"Executorch model: expected {reference_ar} but got {et_ar.tolist()}"

# Run aoti model and check it matches reference model.
# aoti_path = models["aoti_path"]
# aoti_model = torch._export.aot_load(aoti_path, "cpu")
# aoti_image, aoti_ar = aoti_model(image_tensor, inscribed_size, best_resolution)
# self.assertTrue(torch.allclose(reference_image, aoti_image))
# self.assertEqual(reference_ar, aoti_ar.tolist())
aoti_path = models["aoti_path"]
aoti_model = torch._inductor.aoti_load_package(aoti_path)
aoti_image, aoti_ar = aoti_model(image_tensor, inscribed_size, best_resolution)
assert_expected(aoti_image, reference_image, rtol=0, atol=1e-4)
assert (
reference_ar == aoti_ar.tolist()
), f"AOTI model: expected {reference_ar} but got {aoti_ar.tolist()}"

# This test setup mirrors the one in torchtune:
# https://github.com/pytorch/torchtune/blob/main/tests/torchtune/models/clip/test_clip_image_transform.py
Expand Down
2 changes: 1 addition & 1 deletion install_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def python_is_compatible():
# NOTE: If a newly-fetched version of the executorch repo changes the value of
# NIGHTLY_VERSION, you should re-run this script to install the necessary
# package versions.
NIGHTLY_VERSION = "dev20241030"
NIGHTLY_VERSION = "dev20241101"

# The pip repository that hosts nightly torch packages.
TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu"
Expand Down

0 comments on commit 480c4b5

Please sign in to comment.