pytorch
diff --git a/‎backends/qualcomm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/qualcomm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/decompose_minmaxdim.py‎
Lines changed: 104 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_minmaxdim.py‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/i64_to_i32.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/_passes/i64_to_i32.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/qnn_pass_manager.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/qualcomm/_passes/qnn_pass_manager.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/qualcomm/builders/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎backends/qualcomm/builders/__init__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/qualcomm/builders/op_argmax.py‎
Lines changed: 78 additions & 0 deletions b/‎backends/qualcomm/builders/op_argmax.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎backends/qualcomm/builders/op_max_dim.py‎
Lines changed: 89 additions & 0 deletions b/‎backends/qualcomm/builders/op_max_dim.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎backends/qualcomm/builders/op_min.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/builders/op_min.py‎
Lines changed: 1 addition & 1 deletion
@@ -17,6 +17,7 @@
 from .decompose_einsum import DecomposeEinsum
 from .decompose_expm1 import DecomposeExpM1
 from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
+from .decompose_minmaxdim import DecomposeMinMaxDim
 from .decompose_roll import DecomposeRoll
 from .decompose_silu import DecomposeSilu
 from .decompose_wrap_with_autocast import DecomposeWrapWithAutocast
@@ -54,6 +55,7 @@
     DecomposeEinsum,
     DecomposeExpM1,
     DecomposeLinalgVectorNorm,
+    DecomposeMinMaxDim,
     DecomposeRoll,
     DecomposeSilu,
     DecomposeWrapWithAutocast,
 
@@ -0,0 +1,104 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import operator
+from collections import Counter
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
+
+
+class DecomposeMinMaxDim(ExportPass):
+    """
+    Since QNN does not support multi-output ops, this pass decomposes
+        `torch.min(dim=...)` and `torch.max(dim=...)` into two separate operations:
+        - `aten.min.dim` / `aten.max.dim` for the value
+        - `aten.argmin` / `aten.argmax` for the index
+
+        Example transformation in the exported FX graph:
+
+            Python source:
+                val, idx = torch.min(x, dim=1)
+
+            Before:
+                %min = aten.min(%x, dim=1)
+                %val = getitem(%min, 0)
+                %idx = getitem(%min, 1)
+
+            After:
+                %min = aten.min(%x, dim=1)
+                %val = getitem(%min, 0)
+                %idx = aten.argmin(%x, dim=1)
+
+    This pass preserves the value output if used, and transforms only the index path.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.min_dim = exir_ops.edge.aten.min.dim
+        self.max_dim = exir_ops.edge.aten.max.dim
+        self.argmin = exir_ops.edge.aten.argmin.default
+        self.argmax = exir_ops.edge.aten.argmax.default
+        self.getitem = operator.getitem
+
+        # index-only op
+        self.replace_table = {
+            self.min_dim: self.argmin,
+            self.max_dim: self.argmax,
+        }
+
+        self.patterns = [
+            # Only index is used (e.g., _, idx = torch.min(x, dim=1))
+            {self.min_dim: 1, self.getitem: 1},
+            {self.max_dim: 1, self.getitem: 1},
+            # Both value and index are used (e.g., val, idx = torch.max(x, dim=1))
+            {self.min_dim: 1, self.getitem: 2},
+            {self.max_dim: 1, self.getitem: 2},
+        ]
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+        partitions = get_source_partitions(graph, [torch.min, torch.max])
+        for _, src_partitions in partitions.items():
+            for partition in src_partitions:
+                multi_output_node = partition.nodes[0]
+                if Counter([n.target for n in partition.nodes]) not in self.patterns:
+                    continue
+                input_tensor = multi_output_node.args[0]
+                dim = multi_output_node.args[1]
+                keepdim = (
+                    multi_output_node.args[2]
+                    if len(multi_output_node.args) > 2
+                    else False
+                )
+
+                idx_node = next(
+                    (
+                        user
+                        for user in multi_output_node.users
+                        if user.meta["val"].dtype == torch.int64
+                    ),
+                    None,
+                )
+
+                if idx_node:
+                    with graph.inserting_before(idx_node):
+                        argmin_node = graph.create_node(
+                            "call_function",
+                            self.replace_table[multi_output_node.target],
+                            (input_tensor, dim, keepdim),
+                        )
+                        argmin_node.meta = idx_node.meta
+
+                    for user in list(idx_node.users):
+                        user.replace_input_with(idx_node, argmin_node)
+
+        graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
@@ -26,6 +26,7 @@ class I64toI32(ExportPass):
     """
 
     I64_OPS = {
+        exir_ops.edge.aten.argmax.default,
         exir_ops.edge.aten.argmin.default,
         exir_ops.edge.aten.arange.start_step,
         exir_ops.edge.aten.cumsum.default,
 
@@ -93,6 +93,8 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.lt.Tensor,
         exir_ops.edge.aten.maximum.default,
         exir_ops.edge.aten.mean.dim,
+        exir_ops.edge.aten.max.dim,
+        exir_ops.edge.aten.min.dim,
         exir_ops.edge.aten.minimum.default,
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.ne.Scalar,
@@ -169,6 +171,8 @@ def is_layout_sensitive(self, node: torch.fx.Node) -> bool:
     def is_layout_agnostic(self, node: torch.fx.Node) -> bool:
         if node.target in [
             exir_ops.edge.aten.mean.dim,
+            exir_ops.edge.aten.max.dim,
+            exir_ops.edge.aten.min.dim,
             exir_ops.edge.aten.sum.dim_IntList,
         ]:
             # if dimemsion is not kept, we'll have no clue how to do layout transform
 
@@ -22,6 +22,7 @@
     DecomposeEinsum,
     DecomposeExpM1,
     DecomposeLinalgVectorNorm,
+    DecomposeMinMaxDim,
     DecomposeRoll,
     DecomposeSilu,
     DecomposeWrapWithAutocast,
@@ -84,6 +85,7 @@ def get_capture_program_passes():
         (ConvertConv1dToConv2d, True),
         (DecomposeAny, True),
         (DecomposeColIm, True),
+        (DecomposeMinMaxDim, True),
         (ExpandBroadcastTensorShape, False),
         (FixedLinearKeepDim, True),
         (FoldQDQ, True),
 
@@ -12,6 +12,7 @@
     op_amax,
     op_and,
     op_arange,
+    op_argmax,
     op_argmin,
     op_atan,
     op_avg_pool2d,
@@ -54,9 +55,11 @@
     op_lt,
     op_matmul,
     op_max,
+    op_max_dim,
     op_max_pool2d,
     op_mean_dim,
     op_min,
+    op_min_dim,
     op_mul,
     op_ne,
     op_neg,
@@ -105,6 +108,7 @@
     op_amax,
     op_and,
     op_arange,
+    op_argmax,
     op_argmin,
     op_atan,
     op_avg_pool2d,
@@ -147,9 +151,11 @@
     op_lt,
     op_matmul,
     op_max,
+    op_max_dim,
     op_max_pool2d,
     op_mean_dim,
     op_min,
+    op_min_dim,
     op_mul,
     op_neg,
     op_ne,
 
@@ -0,0 +1,78 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import cast, Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+import numpy as np
+import torch
+from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA
+
+from .node_visitor import NodeVisitor
+from .node_visitor_manager import register_node_visitor
+from .qnn_constants import OpArgmax, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class argmax(NodeVisitor):
+    target = ["aten.argmax.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        input_node = self.get_node(node.args[0])
+        input_tensor = self.get_tensor(input_node, node)
+        output_tensor = self.get_tensor(node, node)
+        argmax_inp_tensor_wrapper = self.define_tensor(
+            input_node,
+            node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+        argmax_input_tensors = [argmax_inp_tensor_wrapper]
+        argmax_out_tensor_wrapper = self.define_tensor(
+            node,
+            node,
+            output_tensor.to(torch.int32),
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+        argmax_output_tensors = [argmax_out_tensor_wrapper]
+
+        dim = cast(int, node.args[1])
+        if dim < 0:
+            dim = dim % len(input_tensor.shape)
+        if QCOM_AXIS_ORDER in node.meta:
+            dim = node.meta[QCOM_AXIS_ORDER].index(dim)
+
+        argmax_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpArgmax.op_name,
+        )
+        argmax_op.AddInputTensors(argmax_input_tensors)
+        argmax_op.AddOutputTensors(argmax_output_tensors)
+
+        argmax_op.AddScalarParam(
+            OpArgmax.param_axis,
+            PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
+            {QCOM_DATA: np.uint32(dim)},
+        )
+
+        if len(node.args) > 2:
+            keep_dims = cast(bool, node.args[2])
+            argmax_op.AddScalarParam(
+                OpArgmax.param_keep_dims,
+                PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8,
+                {QCOM_DATA: keep_dims},
+            )
+
+        return argmax_op
@@ -0,0 +1,89 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, List
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import numpy as np
+import torch
+from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA
+
+from .node_visitor import NodeVisitor
+from .node_visitor_manager import register_node_visitor
+from .qnn_constants import OpReduceMax, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class MaxDim(NodeVisitor):
+    target = ["aten.max.dim"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> List[PyQnnWrapper.PyQnnOpWrapper]:
+        input_node = self.get_node(node.args[0])
+        input_tensor = self.get_tensor(input_node, node)
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+
+        # QNN does not support multiple outputs for a single op.
+        # Since torch.max(input, dim) returns both values and indices,
+        # we only support the value output for OpReduceMax. The index output will be handled
+        # separately by OpArgmax.
+        # Therefore, we update node.meta["val"] to only keep the value part.
+        if len(node.meta["val"]) == 2:
+            node.meta["val"] = node.meta["val"][0]
+
+        output_tensor = self.get_tensor(node, node)
+        out_tensor_wrapper = self.define_tensor(
+            node,
+            node,
+            output_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+
+        dims = cast(List[int], [node.args[1]])
+        dims = [mean_dim % len(input_node.meta["val"].shape) for mean_dim in dims]
+        if QCOM_AXIS_ORDER in node.meta:
+            dims = [node.meta[QCOM_AXIS_ORDER].index(mean_dim) for mean_dim in dims]
+        dims_shape = [len(dims)]
+
+        reduce_max_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpReduceMax.op_name,
+        )
+        reduce_max_op.AddInputTensors([input_tensor_wrapper])
+        reduce_max_op.AddOutputTensors([out_tensor_wrapper])
+
+        reduce_max_op.AddTensorParam(
+            OpReduceMax.param_axes,
+            PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
+            len(dims_shape),
+            dims_shape,
+            np.array(dims, dtype=np.uint32),
+            True,
+        )
+        if len(node.args) > 2:
+            keep_dims = cast(bool, node.args[2])
+            reduce_max_op.AddScalarParam(
+                OpReduceMax.param_keep_dims,
+                PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8,
+                {QCOM_DATA: keep_dims},
+            )
+
+        return reduce_max_op
@@ -16,7 +16,7 @@
 
 @register_node_visitor
 class Min(NodeVisitor):
-    target = ["aten.minimum.default", "aten.min.dim"]
+    target = ["aten.minimum.default"]
 
     def __init__(self, *args) -> None:
         super().__init__(*args)