pytorch
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/apple-perf.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/trunk.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 2 additions & 24 deletions b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 2 additions & 24 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 14 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 9 additions & 14 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 3 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_linalg_vector_norm_pass.py
Lines changed: 5 additions & 3 deletions b/‎backends/arm/_passes/decompose_linalg_vector_norm_pass.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎backends/arm/_passes/decompose_linear_pass.py
Lines changed: 5 additions & 21 deletions b/‎backends/arm/_passes/decompose_linear_pass.py
Lines changed: 5 additions & 21 deletions
@@ -342,8 +342,8 @@ jobs:
               git clone https://github.com/huggingface/optimum-executorch
               pushd optimum-executorch
               # There is no release yet, for CI stability, always test from the same commit on main
-              git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-              python install_dev.py
+              git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+              python install_dev.py --skip_override_torch
               pip list
 
               ARGS=(
 
@@ -347,8 +347,8 @@ jobs:
             git clone https://github.com/huggingface/optimum-executorch
             pushd optimum-executorch
             # There is no release yet, for CI stability, always test from the same commit on main
-            git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-            ${CONDA_RUN} python install_dev.py
+            git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+            ${CONDA_RUN} python install_dev.py --skip_override_torch
             pip list
 
             ARGS=(
 
@@ -571,9 +571,8 @@ jobs:
         git clone https://github.com/huggingface/optimum-executorch
         pushd optimum-executorch
         # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-        pip install .[tests]
-        pip install transformers==4.52.4
+        git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+        python install_dev.py --skip_override_torch
         popd
         pip list
         echo "::endgroup::"
 
@@ -29,6 +29,7 @@
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
+from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
 
@@ -5,15 +5,12 @@
 
 # pyre-unsafe
 
-from typing import cast
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
-    insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -59,20 +56,10 @@ class AnnotateChannelsLastDimOrder(ExportPass):
 
     def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
         """
-        returns True for dq and w in the following sequences;
+        returns True for w in the following sequence;
         w -> depthwise_conv2d -> ...
-        w -> dq -> depthwise_conv2d -> ...
         """
-        if node.op == "call_function":
-            if node.target != dq_op:
-                return False
-            prev_node = node.args[0]
-            if cast(torch.fx.Node, prev_node).op != "placeholder":
-                return False
-            if is_consumer_node_depthwise_conv2d(node):
-                consumer_node = list(node.users)[0]
-                return consumer_node.args[1] == node
-        elif node.op == "placeholder":
+        if node.op == "placeholder":
             # node is an input, weight or bias node
             consumer_node = list(node.users)[0]
             if self.is_weight_node_for_depthwise_conv2d(consumer_node):
@@ -129,8 +116,6 @@ def is_channel_reshape(input_shape, output_shape):
 
     @staticmethod
     def insert_input_transpose(node, input_node, graph_module):
-        quantize = input_node.target == dq_op
-        q_params = input_node.args[1:] if quantize else None
         with graph_module.graph.inserting_before(node):
             permute_node = create_node(
                 graph_module.graph,
@@ -143,8 +128,6 @@ def insert_input_transpose(node, input_node, graph_module):
                         else AnnotateChannelsLastDimOrder.NHWC_inverse_order
                     ),
                 ),
-                quantize=quantize,
-                q_params=q_params,
             )
             node.replace_input_with(input_node, permute_node)
 
@@ -185,11 +168,6 @@ def insert_output_transpose(node, graph_module):
             for user in users:
                 user.replace_input_with(node, permute_node)
 
-            quantize = node.args[0] == q_op
-            if quantize:
-                q_params = node.args[0].args[1:]
-                insert_q_dq_pair(graph_module.graph, node, q_params)
-
     @staticmethod
     def _insert_view_transpose(
         input_shape, output_shape, node, input_node, graph_module
 
@@ -7,13 +7,14 @@
 
 import itertools
 import operator
-from typing import List
+from typing import cast, List
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
 
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs
+from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import GraphModule
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
@@ -61,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
         }
         for partition in matmul_partitions:
             quantized_input = all(
-                input_node.target == dq_op for input_node in partition.input_nodes
+                input_node.target in dq_ops for input_node in partition.input_nodes
             )
             matmul_node = [
                 node for node in partition.nodes if node.target in matmul_targets
@@ -74,17 +75,14 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     input_node = self._match_partition_to_node(
                         node, partition.input_nodes
                     )
-                    input_node_qargs = QuantArgs.from_operator(
-                        input_node.target, input_node.args
-                    )
                     # Insert new dq-node just before the mm/bmm with input_node's qparams
                     with graph_module.graph.inserting_before(matmul_node):
                         # Create new dq-node before matmul
                         dq_node = create_node(
                             graph=graph_module.graph,
-                            op_target=dq_op,
+                            op_target=cast(EdgeOpOverload, input_node.target),  # type: ignore[arg-type]
                         )
-                        dq_node.args = (node, *input_node_qargs)
+                        dq_node.args = (node, *input_node.args[1:])
                         matmul_node.replace_input_with(node, dq_node)
 
                 for partition_input in partition.input_nodes:
@@ -95,19 +93,16 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     graph_module.graph.erase_node(partition_input)
 
             partition_output = list(partition.output_nodes[0].users)[0]
-            quantized_output = partition_output.target == q_op
+            quantized_output = partition_output.target in q_ops
             if quantized_output:
-                output_node_qargs = QuantArgs.from_operator(
-                    partition_output.target, partition_output.args
-                )
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
                     q_node = create_node(
                         graph=graph_module.graph,
-                        op_target=q_op,
+                        op_target=cast(EdgeOpOverload, partition_output.target),  # type: ignore[arg-type]
                     )
                     matmul_node.replace_all_uses_with(q_node)
-                    q_node.args = (matmul_node, *output_node_qargs)
+                    q_node.args = (matmul_node, *partition_output.args[1:])
                 # Remove partition output q-node
                 partition_output.replace_all_uses_with(
                     partition_output.all_input_nodes[0]
 
@@ -32,6 +32,7 @@
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
     DecomposeLinearVectorNormPass,
+    DecomposeMaxPool2DPass,
     DecomposeMeanDimPass,
     DecomposeNotEqualPass,
     DecomposeSelectPass,
@@ -92,7 +93,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(RemoveGetItemPass())
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
-        self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
@@ -108,12 +108,13 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
-        self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
+        self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
         if self.tosa_spec.is_U55_subset:
             self.add_pass(BroadcastArgsPass())
+        self.add_pass(DecomposeLinearPass())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
         self.add_pass(RemoveClonePass())
@@ -123,6 +124,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(DecomposeSumPass())
         self.add_pass(Conv1dUnsqueezePass())
+        self.add_pass(DecomposeMaxPool2DPass())
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
@@ -166,7 +168,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
 
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
-        self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
+        self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
@@ -179,6 +181,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(DecomposeSumPass())
         self.add_pass(Conv1dUnsqueezePass())
+        self.add_pass(DecomposeMaxPool2DPass())
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
 
@@ -35,6 +35,8 @@ def _assert_within_int32(self, tensor: torch.Tensor, node: torch.fx.Node):
 
     def _to_int32(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
+            if len(node.users) == 0:
+                continue
             fake_tensor = node.meta["val"]
             if not isinstance(fake_tensor, torch._subclasses.fake_tensor.FakeTensor):
                 continue
 
@@ -51,10 +51,12 @@ def call_operator(self, op, args, kwargs, meta):
                 f"is not supported for linalg_vector_norm operator"
             )
 
+        # Sum over all dimensions if dim is None
         if norm_dim is None:
-            raise ValueError("The norm_dim for linalg_vector_norm is None.")
-
-        dims = [norm_dim] if isinstance(norm_dim, int) else list(norm_dim)
+            rank = input_tensor.data.dim()
+            dims = list(range(rank))
+        else:
+            dims = [norm_dim] if isinstance(norm_dim, int) else list(norm_dim)
 
         # Decomposition based on norm order.
         if norm_order == 1:
 
@@ -1,30 +1,28 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
 import numpy as np
+from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.exir.dialects._ops import ops as exir_ops
-from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.pass_base import PassResult
 
 
-class DecomposeLinearPass(ExportPass):
+class DecomposeLinearPass(ArmPass):
     """
     This pass decomposes linear into a Conv2D with the required view operations.
     linear(x, weights, bias) becomes:
         x_reshaped       = view(x)
         weights_reshaped = view(weights)
         conv2d           = conv2d(x_reshaped, weights_reshaped, bias)
         output           = view(conv2d)
-    It also inserts q/dq pairs if the linear node was quantized.
     """
 
     def call(self, graph_module):
@@ -47,35 +45,22 @@ def call(self, graph_module):
             weights_reshaped_shape = [weights_shape[0], weights_shape[1], 1, 1]
 
             with graph_module.graph.inserting_before(node):
-                quantize = input.op == "call_function" and input.target == dq_op
-                q_params = input.args[1:] if quantize else None
                 # Reshape input to 4D with shape (N, Ci, 1, 1)
                 input_reshaped = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.view_copy.default,
                     args=(input, input_reshaped_shape),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
                 )
 
-                quantize = weights.op == "call_function" and weights.target == dq_op
-                q_params = weights.args[1:] if quantize else None
                 # Reshape weights to 4D with shape (Co, Ci, 1, 1)
                 weights_reshaped = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.view_copy.default,
                     args=(weights, weights_reshaped_shape),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
                 )
 
-                consumer_node = list(node.users)[0]
-                quantize = (
-                    consumer_node.op == "call_function" and consumer_node.target == q_op
-                )
-                q_params = consumer_node.args[1:] if quantize else None
                 conv = create_node(
                     graph=graph_module.graph,
                     op_target=exir_ops.edge.aten.convolution.default,
@@ -91,8 +76,7 @@ def call(self, graph_module):
                         1,  # groups
                     ),
                     kwargs={},
-                    quantize=quantize,
-                    q_params=q_params,
+                    from_node=node,
                 )
 
             with graph_module.graph.inserting_after(conv):