Skip to content

Commit b15b856

Browse files
committed
up
1 parent 777f4d5 commit b15b856

File tree

3 files changed

+51
-25
lines changed

3 files changed

+51
-25
lines changed

.github/workflows/_unittest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
macos:
4444
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4545
with:
46-
runner: macos-m1-stable
46+
runner: macos-15
4747
python-version: '3.11'
4848
submodules: 'recursive'
4949
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,19 @@
99
# the op to the coremltools library.
1010

1111
import torch as _torch
12+
from coremltools import _logger as logger
1213
from coremltools.converters.mil.frontend import _utils
1314
from coremltools.converters.mil.frontend.torch.ops import (
1415
_get_inputs,
15-
NUM_TO_NUMPY_DTYPE, # noqa: F401
16+
NUM_TO_NUMPY_DTYPE,
17+
NUM_TO_TORCH_DTYPE,
1618
transpose,
1719
unbind,
1820
)
1921

2022
from coremltools.converters.mil.frontend.torch.torch_op_registry import (
2123
register_torch_op,
2224
)
23-
from coremltools.converters.mil.frontend.torch.utils import TORCH_DTYPE_TO_NUM
2425
from coremltools.converters.mil.mil import types
2526

2627

@@ -48,8 +49,14 @@ def dequantize_affine(context, node):
4849
zero_point = (
4950
inputs[3].val if inputs[3] is not None and inputs[3].val is not None else None
5051
)
51-
# TODO: I'm not sure we need to worry about this b/c input gets cast to int4/int8
52+
# I do not think we need to worry about input_dtype b/c it gets cast to int4/int8
53+
# For now, we just check that it is int8 or int32
5254
input_dtype = inputs[4].val # noqa: F841
55+
assert NUM_TO_TORCH_DTYPE[input_dtype] in [
56+
_torch.int8,
57+
_torch.int32,
58+
], "input_dtype should be int8 or int32"
59+
5360
quant_min = inputs[5].val
5461
quant_max = inputs[6].val
5562

@@ -67,17 +74,15 @@ def dequantize_affine(context, node):
6774
if zero_point is not None:
6875
zero_point = zero_point.reshape(-1, scales_per_row)
6976

70-
# # TODO: I don't know if CoreML can make use of this. I guess we could add a cast op to the output, but I'm pretty
71-
# CoreML removes casts during one of its passes
77+
# TODO: I don't know if CoreML can make use of this
78+
# We could add a cast op to the output, but I'm pretty CoreML will remove this during a later pass
79+
# For now, we just log a warning
7280
out_np_dtype = None
7381
if len(inputs) > 7:
74-
output_dtype = inputs[7].val
75-
assert isinstance(
76-
output_dtype, _torch.dtype
77-
), f"output_dtype must be a torch.dtype, but got type {type(output_dtype)}"
78-
out_np_dtype = NUM_TO_NUMPY_DTYPE[ # noqa: F841
79-
TORCH_DTYPE_TO_NUM[output_dtype]
80-
]
82+
out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
83+
logger.warning(
84+
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
85+
)
8186

8287
if quant_min == -8 and quant_max == 7:
8388
quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int4"))

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#
33
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
44

5-
import copy
65
import sys
76
import unittest
87

@@ -15,7 +14,7 @@
1514
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1615
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
1716
from executorch.runtime import Runtime
18-
from torchao.quantization import quantize_, PerGroup, PerAxis, IntxWeightOnlyConfig
17+
from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
1918

2019
_TEST_RUNTIME = sys.platform == "darwin"
2120

@@ -30,10 +29,12 @@ def _coreml_partitioner(self):
3029
return CoreMLPartitioner(compile_specs=compile_specs)
3130

3231
def _get_test_model(self):
33-
model = torch.nn.Sequential(torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU())
32+
model = torch.nn.Sequential(
33+
torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU()
34+
)
3435
example_inputs = (torch.LongTensor([0]),)
3536
return model, example_inputs
36-
37+
3738
def _compare_outputs(self, executorch_program, eager_program, example_inputs):
3839
if not _TEST_RUNTIME:
3940
return
@@ -45,10 +46,14 @@ def _compare_outputs(self, executorch_program, eager_program, example_inputs):
4546
self.assertTrue(
4647
torch.allclose(et_outputs, eager_outputs, atol=1e-02, rtol=1e-02)
4748
)
48-
49+
4950
def test_dequantize_affine_b4w_embedding(self):
5051
model, example_inputs = self._get_test_model()
51-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
52+
quantize_(
53+
model,
54+
IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
55+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
56+
)
5257
ep = torch.export.export(model, example_inputs)
5358
delegated_program = executorch.exir.to_edge_transform_and_lower(
5459
ep,
@@ -65,7 +70,10 @@ def test_dequantize_affine_b4w_embedding(self):
6570

6671
def test_dequantize_affine_b4w_linear(self):
6772
model, example_inputs = self._get_test_model()
68-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)))
73+
quantize_(
74+
model,
75+
IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
76+
)
6977
ep = torch.export.export(model, example_inputs)
7078
delegated_program = executorch.exir.to_edge_transform_and_lower(
7179
ep,
@@ -82,7 +90,11 @@ def test_dequantize_affine_b4w_linear(self):
8290

8391
def test_dequantize_affine_c4w_embedding(self):
8492
model, example_inputs = self._get_test_model()
85-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
93+
quantize_(
94+
model,
95+
IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)),
96+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
97+
)
8698
ep = torch.export.export(model, example_inputs)
8799
delegated_program = executorch.exir.to_edge_transform_and_lower(
88100
ep,
@@ -99,7 +111,9 @@ def test_dequantize_affine_c4w_embedding(self):
99111

100112
def test_dequantize_affine_c4w_linear(self):
101113
model, example_inputs = self._get_test_model()
102-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0)))
114+
quantize_(
115+
model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerAxis(0))
116+
)
103117
ep = torch.export.export(model, example_inputs)
104118
delegated_program = executorch.exir.to_edge_transform_and_lower(
105119
ep,
@@ -113,11 +127,18 @@ def test_dequantize_affine_c4w_linear(self):
113127
], f"Got unexpected node target after delegation: {node.target.__name__}"
114128
et_prog = delegated_program.to_executorch()
115129
self._compare_outputs(et_prog, model, example_inputs)
116-
130+
117131
def test_dequantize_affine_c8w_embedding_b4w_linear(self):
118132
model, example_inputs = self._get_test_model()
119-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)), lambda m, fqn: isinstance(m, torch.nn.Embedding))
120-
quantize_(model, IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)))
133+
quantize_(
134+
model,
135+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
136+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
137+
)
138+
quantize_(
139+
model,
140+
IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
141+
)
121142
ep = torch.export.export(model, example_inputs)
122143
delegated_program = executorch.exir.to_edge_transform_and_lower(
123144
ep,

0 commit comments

Comments
 (0)