Arm backend: Add sub tensor to match scalar tensors to input data types. (pytorch#12541)

SaoirseARM · web-flow · commit b7eff08b56a8 · 2025-07-21T10:59:32.000+02:00
Updates existing pass to ensure that x1, x2 are of the same type.
This comes from an issue where scalars are not automatically casted to
the input dtype unlike mul and add operators.
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -67,8 +67,8 @@
 )
 from .insert_rescales_pass import InsertRescalePass  # noqa
 from .insert_table_ops import InsertTableOpsPass  # noqa
+from .match_arg_dtype_pass import MatchArgDtypePass  # noqa
 from .match_arg_ranks_pass import MatchArgRanksPass  # noqa
-from .match_where_self_arg_dtype_pass import MatchWhereSelfDtypePass  # noqa
 from .mm_to_bmm_pass import ConvertMmToBmmPass  # noqa
 from .remove_clone_pass import RemoveClonePass  # noqa
 from .replace_scalar_with_tensor_pass import (  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -66,8 +66,8 @@
     InsertCastForOpsWithInt64InputPass,
     InsertRescalePass,
     InsertTableOpsPass,
+    MatchArgDtypePass,
     MatchArgRanksPass,
-    MatchWhereSelfDtypePass,
     QuantizeOperatorArguments,
     RemoveClonePass,
     ReplaceInfValues,
@@ -116,7 +116,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
         self.add_pass(ConvertAnyDefaultDimDimsPass())
-        self.add_pass(MatchWhereSelfDtypePass())
+        self.add_pass(MatchArgDtypePass())
         if self.tosa_spec.is_U55_subset:
             self.add_pass(CastToInt32Pass())
 
@@ -193,8 +193,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
         self.add_pass(ConvertAnyDefaultDimDimsPass())
-        self.add_pass(MatchWhereSelfDtypePass())
-
+        self.add_pass(MatchArgDtypePass())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
         self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
diff --git a/backends/arm/_passes/match_arg_dtype_pass.py b/backends/arm/_passes/match_arg_dtype_pass.py
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.backends.arm._passes.arm_pass_utils import create_node, get_node_arg
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 
@@ -26,7 +26,7 @@ def get_largest_dtype(dtype_1, dtype_2):
     return dtype_1 if DTYPE_RANK[dtype_1] > DTYPE_RANK[dtype_2] else dtype_2
 
 
-class MatchWhereSelfDtypePass(ExportPass):
+class MatchArgDtypePass(ExportPass):
     """Pass to match data types of non-condition input tensors.
 
     Edge dialect allows different data types for non-condition tensors, while TOSA
@@ -38,14 +38,18 @@ class MatchWhereSelfDtypePass(ExportPass):
 
     """
 
+    targeted_ops = {exir_ops.edge.aten.sub.Tensor, exir_ops.edge.aten.where.self}
+
     def call(self, graph_module: torch.fx.GraphModule):
         modified_graph = False
         graph = graph_module.graph
-        node_list = graph.find_nodes(
-            op="call_function", target=exir_ops.edge.aten.where.self
-        )
-        for node in node_list:
-            cond, input_, other_ = node.args
+
+        for node in list(graph.nodes):
+            if node.op != "call_function" or node.target not in self.targeted_ops:
+                continue
+
+            input_ = get_node_arg(node.args, 0)
+            other_ = get_node_arg(node.args, 1)
 
             input_dtype = input_.meta["val"].dtype
             other_dtype = other_.meta["val"].dtype
diff --git a/backends/arm/test/ops/test_scalars.py b/backends/arm/test/ops/test_scalars.py
@@ -242,21 +242,16 @@ def test_add_scalar_u85_BI():
 
 
 # SUB MI ------------------------------------------------------
-mi_sub_xfails = {
-    "int_r1_ts": "TypeError: All IO needs to have the same data type, got input 1: 8, input 2: 6 and output: 8",
-    "int_r4_ts": "TypeError: All IO needs to have the same data type, got input 1: 8, input 2: 6 and output: 8",
-    **xfails,
-}
 
 
-@common.parametrize("test_data", tensor_scalar_tests, xfails=mi_sub_xfails)
+@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
 def test_sub_tensor_tosa_MI_scalar(test_data):
     """Tests regular sub with one scalar input."""
     pipeline = TosaPipelineMI[input_t1](Sub(), test_data, aten_op=Sub.aten_op)
     pipeline.run()
 
 
-@common.parametrize("test_data", tensor_scalar_tests, xfails=mi_sub_xfails)
+@common.parametrize("test_data", tensor_scalar_tests, xfails=xfails)
 def test_sub_tensor_tosa_MI_inplace(test_data):
     """Tests inplace sub with one scalar input."""
     pipeline = TosaPipelineMI[input_t1](SubInplace(), test_data, aten_op=[])
diff --git a/backends/arm/test/ops/test_sub.py b/backends/arm/test/ops/test_sub.py
@@ -42,6 +42,8 @@
         torch.randn(1, 4, 4, 1),
         torch.randn(1, 1, 4, 4),
     ),
+    "rand_3d_rand_Scalar": lambda: (torch.rand(1, 6, 2), torch.rand(1)),
+    "rand_3d_Scalar": lambda: (torch.rand(1, 6, 2), 1),
 }
 fvp_sub2_xfails = {"rand_4D_2x2x4x4": "MLETORCH-517 : Multiple batches not supported"}
 
@@ -93,7 +95,6 @@ def test_sub_tensor_tosa_BI(test_data):
         aten_op,
         exir_op,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
 
 
@@ -106,7 +107,6 @@ def test_sub_tensor_tosa_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
         aten_op,
         exir_op,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
 
 
@@ -121,7 +121,6 @@ def test_sub_tensor_u55_BI(test_data):
         exir_op,
         run_on_fvp=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
 
 
@@ -136,7 +135,6 @@ def test_sub_tensor_u55_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):
         exir_op,
         run_on_fvp=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
 
 
@@ -151,7 +149,6 @@ def test_sub_tensor_u85_BI_2(test_data):
         exir_op,
         run_on_fvp=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
 
 
@@ -166,5 +163,4 @@ def test_sub_tensor_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]):
         exir_op,
         run_on_fvp=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,8 @@`
`42`	`42`	`torch.randn(1, 4, 4, 1),`
`43`	`43`	`torch.randn(1, 1, 4, 4),`
`44`	`44`	`),`
	`45`	`+ "rand_3d_rand_Scalar": lambda: (torch.rand(1, 6, 2), torch.rand(1)),`
	`46`	`+ "rand_3d_Scalar": lambda: (torch.rand(1, 6, 2), 1),`
`45`	`47`	`}`
`46`	`48`	`fvp_sub2_xfails = {"rand_4D_2x2x4x4": "MLETORCH-517 : Multiple batches not supported"}`
`47`	`49`
`@@ -93,7 +95,6 @@ def test_sub_tensor_tosa_BI(test_data):`
`93`	`95`	`aten_op,`
`94`	`96`	`exir_op,`
`95`	`97`	`)`
`96`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`97`	`98`	`pipeline.run()`
`98`	`99`
`99`	`100`
`@@ -106,7 +107,6 @@ def test_sub_tensor_tosa_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):`
`106`	`107`	`aten_op,`
`107`	`108`	`exir_op,`
`108`	`109`	`)`
`109`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`110`	`110`	`pipeline.run()`
`111`	`111`
`112`	`112`
`@@ -121,7 +121,6 @@ def test_sub_tensor_u55_BI(test_data):`
`121`	`121`	`exir_op,`
`122`	`122`	`run_on_fvp=True,`
`123`	`123`	`)`
`124`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`125`	`124`	`pipeline.run()`
`126`	`125`
`127`	`126`
`@@ -136,7 +135,6 @@ def test_sub_tensor_u55_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]):`
`136`	`135`	`exir_op,`
`137`	`136`	`run_on_fvp=True,`
`138`	`137`	`)`
`139`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`140`	`138`	`pipeline.run()`
`141`	`139`
`142`	`140`
`@@ -151,7 +149,6 @@ def test_sub_tensor_u85_BI_2(test_data):`
`151`	`149`	`exir_op,`
`152`	`150`	`run_on_fvp=True,`
`153`	`151`	`)`
`154`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`155`	`152`	`pipeline.run()`
`156`	`153`
`157`	`154`
`@@ -166,5 +163,4 @@ def test_sub_tensor_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]):`
`166`	`163`	`exir_op,`
`167`	`164`	`run_on_fvp=True,`
`168`	`165`	`)`
`169`		`- pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`170`	`166`	`pipeline.run()`