up

metascroy · metascroy · commit b15b856cfd32 · 2025-07-21T09:41:56.000-07:00
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
@@ -43,7 +43,7 @@ jobs:
   macos:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
-      runner: macos-m1-stable
+      runner: macos-15
       python-version: '3.11'
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py
@@ -9,18 +9,19 @@
 # the op to the coremltools library.
 
 import torch as _torch
+from coremltools import _logger as logger
 from coremltools.converters.mil.frontend import _utils
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
-    NUM_TO_NUMPY_DTYPE,  # noqa: F401
+    NUM_TO_NUMPY_DTYPE,
+    NUM_TO_TORCH_DTYPE,
     transpose,
     unbind,
 )
 
 from coremltools.converters.mil.frontend.torch.torch_op_registry import (
     register_torch_op,
 )
-from coremltools.converters.mil.frontend.torch.utils import TORCH_DTYPE_TO_NUM
 from coremltools.converters.mil.mil import types
 
 
@@ -48,8 +49,14 @@ def dequantize_affine(context, node):
     zero_point = (
         inputs[3].val if inputs[3] is not None and inputs[3].val is not None else None
     )
-    # TODO: I'm not sure we need to worry about this b/c input gets cast to int4/int8
+    # I do not think we need to worry about input_dtype b/c it gets cast to int4/int8
+    # For now, we just check that it is int8 or int32
     input_dtype = inputs[4].val  # noqa: F841
+    assert NUM_TO_TORCH_DTYPE[input_dtype] in [
+        _torch.int8,
+        _torch.int32,
+    ], "input_dtype should be int8 or int32"
+
     quant_min = inputs[5].val
     quant_max = inputs[6].val
 
@@ -67,17 +74,15 @@ def dequantize_affine(context, node):
     if zero_point is not None:
         zero_point = zero_point.reshape(-1, scales_per_row)
 
-    # # TODO: I don't know if CoreML can make use of this.  I guess we could add a cast op to the output, but I'm pretty
-    # CoreML removes casts during one of its passes
+    # TODO: I don't know if CoreML can make use of this
+    # We could add a cast op to the output, but I'm pretty CoreML will remove this during a later pass
+    # For now, we just log a warning
     out_np_dtype = None
     if len(inputs) > 7:
-        output_dtype = inputs[7].val
-        assert isinstance(
-            output_dtype, _torch.dtype
-        ), f"output_dtype must be a torch.dtype, but got type {type(output_dtype)}"
-        out_np_dtype = NUM_TO_NUMPY_DTYPE[  # noqa: F841
-            TORCH_DTYPE_TO_NUM[output_dtype]
-        ]
+        out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
+        logger.warning(
+            f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
+        )
 
     if quant_min == -8 and quant_max == 7:
         quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int4"))
diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py
@@ -2,7 +2,6 @@
 #
 # Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
-import copy
 import sys
 import unittest
 
@@ -15,7 +14,7 @@
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
 from executorch.runtime import Runtime
-from torchao.quantization import quantize_, PerGroup, PerAxis, IntxWeightOnlyConfig
+from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
 
 _TEST_RUNTIME = sys.platform == "darwin"
 
@@ -30,10 +29,12 @@ def _coreml_partitioner(self):
         return CoreMLPartitioner(compile_specs=compile_specs)
 
     def _get_test_model(self):
-        model = torch.nn.Sequential(torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU())
+        model = torch.nn.Sequential(
+            torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU()
+        )
         example_inputs = (torch.LongTensor([0]),)
         return model, example_inputs
-    
+
     def _compare_outputs(self, executorch_program, eager_program, example_inputs):
         if not _TEST_RUNTIME:
             return
@@ -45,10 +46,14 @@ def _compare_outputs(self, executorch_program, eager_program, example_inputs):
         self.assertTrue(
             torch.allclose(et_outputs, eager_outputs, atol=1e-02, rtol=1e-02)
         )
-    
+
     def test_dequantize_affine_b4w_embedding(self):
         model, example_inputs = self._get_test_model()
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
         ep = torch.export.export(model, example_inputs)
         delegated_program = executorch.exir.to_edge_transform_and_lower(
             ep,
@@ -65,7 +70,10 @@ def test_dequantize_affine_b4w_embedding(self):
 
     def test_dequantize_affine_b4w_linear(self):
         model, example_inputs = self._get_test_model()
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)))
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
+        )
         ep = torch.export.export(model, example_inputs)
         delegated_program = executorch.exir.to_edge_transform_and_lower(
             ep,
@@ -82,7 +90,11 @@ def test_dequantize_affine_b4w_linear(self):
 
     def test_dequantize_affine_c4w_embedding(self):
         model, example_inputs = self._get_test_model()
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
         ep = torch.export.export(model, example_inputs)
         delegated_program = executorch.exir.to_edge_transform_and_lower(
             ep,
@@ -99,7 +111,9 @@ def test_dequantize_affine_c4w_embedding(self):
 
     def test_dequantize_affine_c4w_linear(self):
         model, example_inputs = self._get_test_model()
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)))
+        quantize_(
+            model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0))
+        )
         ep = torch.export.export(model, example_inputs)
         delegated_program = executorch.exir.to_edge_transform_and_lower(
             ep,
@@ -113,11 +127,18 @@ def test_dequantize_affine_c4w_linear(self):
                 ], f"Got unexpected node target after delegation: {node.target.__name__}"
         et_prog = delegated_program.to_executorch()
         self._compare_outputs(et_prog, model, example_inputs)
-    
+
     def test_dequantize_affine_c8w_embedding_b4w_linear(self):
         model, example_inputs = self._get_test_model()
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
-        quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)))
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
+        )
         ep = torch.export.export(model, example_inputs)
         delegated_program = executorch.exir.to_edge_transform_and_lower(
             ep,