Arm backend: enable dim_order (#7952)

pytorchbot · oscarandersson8218 · web-flow · commit 95a0ef6f4394 · 2025-01-27T13:12:00.000-05:00
Arm backend: enable dim_order (#7831) Add support for to_dim_order_copy With edge_compile_config.skip_dim_order = True removed, to_copy will be converted into to_dim_order_copy nodes. This commit moves our logic from to_copy into to_dim_order_copy. Signed-off-by: Oscar Andersson <oscar.andersson@arm.com> (cherry picked from commit 135e875) Co-authored-by: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com>
diff --git a/backends/arm/operator_support/to_copy_support.py b/backends/arm/operator_support/to_copy_support.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -22,7 +22,10 @@
 
 @register_tosa_support_check
 class ToCopySupported(SupportedTOSAOperatorCheck):
-    targets = [exir_ops.edge.aten._to_copy.default]
+    targets = [
+        exir_ops.edge.aten._to_copy.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+    ]
 
     tosa_specs = [
         TosaSpecification.create_from_string("TOSA-0.80.0+BI"),
@@ -110,7 +113,7 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
             )
             return False
 
-        # Check memory format
+        # Check memory format (to_copy)
         if "memory_format" in node.kwargs:
             if node.kwargs["memory_format"] in (torch.preserve_format,):
                 logger.info(
@@ -119,4 +122,14 @@ def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool
                 )
                 return False
 
+        # Check dim_order (to_dim_order_copy)
+        if "dim_order" in node.kwargs:
+            dim_order = node.kwargs["dim_order"]
+            if dim_order != list(range(len(dim_order))):
+                logger.info(
+                    f"Argument {dim_order=} is not supported for "
+                    f"{node.target.name()} right now."  # pyre-ignore[16]
+                )
+                return False
+
         return True
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -39,6 +39,7 @@
     op_sum,
     op_tanh,
     op_to_copy,
+    op_to_dim_order_copy,
     op_transpose,
     op_unsqueeze,
     op_upsample_nearest2d,
diff --git a/backends/arm/operators/op_to_dim_order_copy.py b/backends/arm/operators/op_to_dim_order_copy.py
@@ -0,0 +1,40 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+from typing import List
+
+import serializer.tosa_serializer as ts
+import torch
+import tosa.Op as TosaOp
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+
+
+@register_node_visitor
+class ToDimOrderCopyVisitor(NodeVisitor):
+    """
+    Implement the type cast functionality of _to_dim_order_copy.
+
+    Other features like setting of the dim_order or moving a tensor to a
+    different device are not supported.
+
+    Also note that the node should not be quantized.
+    """
+
+    target = "dim_order_ops._to_dim_order_copy.default"
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+        tosa_graph.addOperator(TosaOp.Op().CAST, [inputs[0].name], [output.name])
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -12,7 +12,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from torchvision import models, transforms
 from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
 
@@ -45,10 +44,6 @@ class TestMobileNetV2(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
     }
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def test_mv2_tosa_MI(self):
         (
             ArmTester(
@@ -59,7 +54,7 @@ def test_mv2_tosa_MI(self):
                 ),
             )
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .run_method_and_compare_outputs(inputs=self.model_inputs)
         )
@@ -75,7 +70,7 @@ def test_mv2_tosa_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             # atol=1.0 is a defensive upper limit
             # TODO MLETROCH-72
@@ -92,7 +87,7 @@ def test_mv2_u55_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )
@@ -110,7 +105,7 @@ def test_mv2_u85_BI(self):
             )
             .quantize()
             .export()
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
         )
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -50,10 +49,6 @@ def __init__(self):
         def forward(self, x, y):
             return x + y
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_add_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -66,7 +61,7 @@ def _test_add_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -86,7 +81,7 @@ def _test_add_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.add.Tensor": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
@@ -14,7 +14,6 @@
 from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -106,10 +105,6 @@
 class TestLinear(unittest.TestCase):
     """tests the linear operation y = Ax + b"""
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     class Linear(torch.nn.Module):
         def __init__(
             self,
@@ -141,7 +136,7 @@ def _test_linear_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data)
@@ -162,7 +157,7 @@ def _test_linear_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data, qtol=1)
@@ -184,7 +179,7 @@ def _test_linear_tosa_ethosu_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.linear.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
+            .to_edge_transform_and_lower()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .serialize()
diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.maximum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_maximum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_maximum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_maximum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.maximum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -38,10 +37,6 @@ def __init__(self):
         def forward(self, x, y):
             return torch.minimum(x, y)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_minimum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
@@ -54,7 +49,7 @@ def _test_minimum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -74,7 +69,7 @@ def _test_minimum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.minimum.default": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py
@@ -11,7 +11,6 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
-from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
@@ -50,10 +49,6 @@ class Sum(torch.nn.Module):
         def forward(self, x: torch.Tensor, dim: int, keepdim: bool):
             return x.sum(dim=dim, keepdim=keepdim)
 
-    _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(
-        _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
-    )
-
     def _test_sum_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: tuple[exampledata_t]
     ):
@@ -66,7 +61,7 @@ def _test_sum_tosa_MI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check_not(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
@@ -86,7 +81,7 @@ def _test_sum_tosa_BI_pipeline(
             .export()
             .check_count({"torch.ops.aten.sum.dim_IntList": 1})
             .check(["torch.ops.quantized_decomposed"])
-            .to_edge(config=self._edge_compile_config)
+            .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
@@ -218,8 +218,6 @@ def to_edge(
             if config is not None:
                 to_edge_stage.edge_compile_conf = config
 
-        # TODO(T182928844): Delegate dim order op to backend.
-        to_edge_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge(to_edge_stage)
 
     def partition(self, partition_stage: Optional[Partition] = None):
@@ -245,7 +243,6 @@ def to_edge_transform_and_lower(
                 to_edge_and_lower_stage.partitioners = partitioners
             if edge_compile_config is not None:
                 to_edge_and_lower_stage.edge_compile_conf = edge_compile_config
-        to_edge_and_lower_stage.edge_compile_conf._skip_dim_order = True
         return super().to_edge_transform_and_lower(to_edge_and_lower_stage)
 
     def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = None):
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -511,15 +511,13 @@ def get_args():
             partitioner=[ArmPartitioner(compile_spec)],
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )
     else:
         edge = to_edge_transform_and_lower(
             exported_program,
             compile_config=EdgeCompileConfig(
                 _check_ir_validity=False,
-                _skip_dim_order=True,
             ),
         )
 

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,6 @@`
`12`	`12`	`from executorch.backends.arm.test import common, conftest`
`13`	`13`
`14`	`14`	`from executorch.backends.arm.test.tester.arm_tester import ArmTester`
`15`		`-from executorch.exir import EdgeCompileConfig`
`16`	`15`	`from torchvision import models, transforms`
`17`	`16`	`from torchvision.models.mobilenetv2 import MobileNet_V2_Weights`
`18`	`17`
`@@ -45,10 +44,6 @@ class TestMobileNetV2(unittest.TestCase):`
`45`	`44`	`"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",`
`46`	`45`	`}`
`47`	`46`
`48`		`- _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig(`
`49`		`- _skip_dim_order=True, # TODO(T182928844): Delegate dim order op to backend.`
`50`		`- )`
`51`		`-`
`52`	`47`	`def test_mv2_tosa_MI(self):`
`53`	`48`	`(`
`54`	`49`	`ArmTester(`
`@@ -59,7 +54,7 @@ def test_mv2_tosa_MI(self):`
`59`	`54`	`),`
`60`	`55`	`)`
`61`	`56`	`.export()`
`62`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`57`	`+ .to_edge_transform_and_lower()`
`63`	`58`	`.to_executorch()`
`64`	`59`	`.run_method_and_compare_outputs(inputs=self.model_inputs)`
`65`	`60`	`)`
`@@ -75,7 +70,7 @@ def test_mv2_tosa_BI(self):`
`75`	`70`	`)`
`76`	`71`	`.quantize()`
`77`	`72`	`.export()`
`78`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`73`	`+ .to_edge_transform_and_lower()`
`79`	`74`	`.to_executorch()`
`80`	`75`	`# atol=1.0 is a defensive upper limit`
`81`	`76`	`# TODO MLETROCH-72`
`@@ -92,7 +87,7 @@ def test_mv2_u55_BI(self):`
`92`	`87`	`)`
`93`	`88`	`.quantize()`
`94`	`89`	`.export()`
`95`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`90`	`+ .to_edge_transform_and_lower()`
`96`	`91`	`.to_executorch()`
`97`	`92`	`.serialize()`
`98`	`93`	`)`
`@@ -110,7 +105,7 @@ def test_mv2_u85_BI(self):`
`110`	`105`	`)`
`111`	`106`	`.quantize()`
`112`	`107`	`.export()`
`113`		`- .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)`
	`108`	`+ .to_edge_transform_and_lower()`
`114`	`109`	`.to_executorch()`
`115`	`110`	`.serialize()`
`116`	`111`	`)`