Skip to content

Commit b8ee1f5

Browse files
authored
Use dedicated tags for most common instances (#19762)
This uses dedicated secondary type tags to 5 most common instances. I also move the instance cache from `checker.py` to `types.py` so it is easier to share. The latter however requires couple tweaks to not break builtins fixtures in tests (see changes in `build.py` and `checkexpr.py`). This makes cache another ~20% smaller (so that with this PR FF is 4.5x smaller than JSON), and also this makes `mypy -c 'import torch'` almost 10% faster with warm cache (when one uses `--fixed-format-cache` obviously). I don't see any visible effect on cold cache runs.
1 parent 23965ab commit b8ee1f5

File tree

4 files changed

+99
-23
lines changed

4 files changed

+99
-23
lines changed

mypy/build.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
9292
from mypy.stats import dump_type_stats
9393
from mypy.stubinfo import is_module_from_legacy_bundled_package, stub_distribution_name
94-
from mypy.types import Type
94+
from mypy.types import Type, instance_cache
9595
from mypy.typestate import reset_global_state, type_state
9696
from mypy.util import json_dumps, json_loads
9797
from mypy.version import __version__
@@ -180,6 +180,9 @@ def build(
180180
# fields for callers that want the traditional API.
181181
messages = []
182182

183+
# This is mostly for the benefit of tests that use builtins fixtures.
184+
instance_cache.reset()
185+
183186
def default_flush_errors(
184187
filename: str | None, new_messages: list[str], is_serious: bool
185188
) -> None:

mypy/checker.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
flatten_nested_unions,
230230
get_proper_type,
231231
get_proper_types,
232+
instance_cache,
232233
is_literal_type,
233234
is_named_instance,
234235
)
@@ -467,12 +468,6 @@ def __init__(
467468
self, self.msg, self.plugin, per_line_checking_time_ns
468469
)
469470

470-
self._str_type: Instance | None = None
471-
self._function_type: Instance | None = None
472-
self._int_type: Instance | None = None
473-
self._bool_type: Instance | None = None
474-
self._object_type: Instance | None = None
475-
476471
self.pattern_checker = PatternChecker(self, self.msg, self.plugin, options)
477472
self._unique_id = 0
478473

@@ -7460,25 +7455,25 @@ def named_type(self, name: str) -> Instance:
74607455
For example, named_type('builtins.object') produces the 'object' type.
74617456
"""
74627457
if name == "builtins.str":
7463-
if self._str_type is None:
7464-
self._str_type = self._named_type(name)
7465-
return self._str_type
7458+
if instance_cache.str_type is None:
7459+
instance_cache.str_type = self._named_type(name)
7460+
return instance_cache.str_type
74667461
if name == "builtins.function":
7467-
if self._function_type is None:
7468-
self._function_type = self._named_type(name)
7469-
return self._function_type
7462+
if instance_cache.function_type is None:
7463+
instance_cache.function_type = self._named_type(name)
7464+
return instance_cache.function_type
74707465
if name == "builtins.int":
7471-
if self._int_type is None:
7472-
self._int_type = self._named_type(name)
7473-
return self._int_type
7466+
if instance_cache.int_type is None:
7467+
instance_cache.int_type = self._named_type(name)
7468+
return instance_cache.int_type
74747469
if name == "builtins.bool":
7475-
if self._bool_type is None:
7476-
self._bool_type = self._named_type(name)
7477-
return self._bool_type
7470+
if instance_cache.bool_type is None:
7471+
instance_cache.bool_type = self._named_type(name)
7472+
return instance_cache.bool_type
74787473
if name == "builtins.object":
7479-
if self._object_type is None:
7480-
self._object_type = self._named_type(name)
7481-
return self._object_type
7474+
if instance_cache.object_type is None:
7475+
instance_cache.object_type = self._named_type(name)
7476+
return instance_cache.object_type
74827477
return self._named_type(name)
74837478

74847479
def _named_type(self, name: str) -> Instance:

mypy/checkexpr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,8 @@ def module_type(self, node: MypyFile) -> Instance:
494494
# In test cases might 'types' may not be available.
495495
# Fall back to a dummy 'object' type instead to
496496
# avoid a crash.
497-
result = self.named_type("builtins.object")
497+
# Make a copy so that we don't set extra_attrs (below) on a shared instance.
498+
result = self.named_type("builtins.object").copy_modified()
498499
module_attrs: dict[str, Type] = {}
499500
immutable = set()
500501
for name, n in node.names.items():

mypy/types.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,23 @@ def deserialize(cls, data: JsonDict | str) -> Instance:
17081708

17091709
def write(self, data: Buffer) -> None:
17101710
write_tag(data, INSTANCE)
1711+
if not self.args and not self.last_known_value and not self.extra_attrs:
1712+
type_ref = self.type.fullname
1713+
if type_ref == "builtins.str":
1714+
write_tag(data, INSTANCE_STR)
1715+
elif type_ref == "builtins.function":
1716+
write_tag(data, INSTANCE_FUNCTION)
1717+
elif type_ref == "builtins.int":
1718+
write_tag(data, INSTANCE_INT)
1719+
elif type_ref == "builtins.bool":
1720+
write_tag(data, INSTANCE_BOOL)
1721+
elif type_ref == "builtins.object":
1722+
write_tag(data, INSTANCE_OBJECT)
1723+
else:
1724+
write_tag(data, INSTANCE_SIMPLE)
1725+
write_str(data, type_ref)
1726+
return
1727+
write_tag(data, INSTANCE_GENERIC)
17111728
write_str(data, self.type.fullname)
17121729
write_type_list(data, self.args)
17131730
write_type_opt(data, self.last_known_value)
@@ -1719,6 +1736,39 @@ def write(self, data: Buffer) -> None:
17191736

17201737
@classmethod
17211738
def read(cls, data: Buffer) -> Instance:
1739+
tag = read_tag(data)
1740+
# This is quite verbose, but this is very hot code, so we are not
1741+
# using dictionary lookups here.
1742+
if tag == INSTANCE_STR:
1743+
if instance_cache.str_type is None:
1744+
instance_cache.str_type = Instance(NOT_READY, [])
1745+
instance_cache.str_type.type_ref = "builtins.str"
1746+
return instance_cache.str_type
1747+
if tag == INSTANCE_FUNCTION:
1748+
if instance_cache.function_type is None:
1749+
instance_cache.function_type = Instance(NOT_READY, [])
1750+
instance_cache.function_type.type_ref = "builtins.function"
1751+
return instance_cache.function_type
1752+
if tag == INSTANCE_INT:
1753+
if instance_cache.int_type is None:
1754+
instance_cache.int_type = Instance(NOT_READY, [])
1755+
instance_cache.int_type.type_ref = "builtins.int"
1756+
return instance_cache.int_type
1757+
if tag == INSTANCE_BOOL:
1758+
if instance_cache.bool_type is None:
1759+
instance_cache.bool_type = Instance(NOT_READY, [])
1760+
instance_cache.bool_type.type_ref = "builtins.bool"
1761+
return instance_cache.bool_type
1762+
if tag == INSTANCE_OBJECT:
1763+
if instance_cache.object_type is None:
1764+
instance_cache.object_type = Instance(NOT_READY, [])
1765+
instance_cache.object_type.type_ref = "builtins.object"
1766+
return instance_cache.object_type
1767+
if tag == INSTANCE_SIMPLE:
1768+
inst = Instance(NOT_READY, [])
1769+
inst.type_ref = read_str(data)
1770+
return inst
1771+
assert tag == INSTANCE_GENERIC
17221772
type_ref = read_str(data)
17231773
inst = Instance(NOT_READY, read_type_list(data))
17241774
inst.type_ref = type_ref
@@ -1769,6 +1819,25 @@ def is_singleton_type(self) -> bool:
17691819
)
17701820

17711821

1822+
class InstanceCache:
1823+
def __init__(self) -> None:
1824+
self.str_type: Instance | None = None
1825+
self.function_type: Instance | None = None
1826+
self.int_type: Instance | None = None
1827+
self.bool_type: Instance | None = None
1828+
self.object_type: Instance | None = None
1829+
1830+
def reset(self) -> None:
1831+
self.str_type = None
1832+
self.function_type = None
1833+
self.int_type = None
1834+
self.bool_type = None
1835+
self.object_type = None
1836+
1837+
1838+
instance_cache: Final = InstanceCache()
1839+
1840+
17721841
class FunctionLike(ProperType):
17731842
"""Abstract base class for function types."""
17741843

@@ -4142,6 +4211,14 @@ def type_vars_as_args(type_vars: Sequence[TypeVarLikeType]) -> tuple[Type, ...]:
41424211
TYPE_TYPE: Final[Tag] = 18
41434212
PARAMETERS: Final[Tag] = 19
41444213

4214+
INSTANCE_STR: Final[Tag] = 101
4215+
INSTANCE_FUNCTION: Final[Tag] = 102
4216+
INSTANCE_INT: Final[Tag] = 103
4217+
INSTANCE_BOOL: Final[Tag] = 104
4218+
INSTANCE_OBJECT: Final[Tag] = 105
4219+
INSTANCE_SIMPLE: Final[Tag] = 106
4220+
INSTANCE_GENERIC: Final[Tag] = 107
4221+
41454222

41464223
def read_type(data: Buffer) -> Type:
41474224
tag = read_tag(data)

0 commit comments

Comments
 (0)