Skip to content

Optimize dependency iteration #13489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/optimize-dependency-cache.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optimize dependency resolution performance by caching parsed dependencies and extras to avoid redundant parsing operations during candidate evaluation. Also add support for discovering distributions from sys.meta_path finders to enable in-memory package installations.
37 changes: 37 additions & 0 deletions src/pip/_internal/metadata/importlib/_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,28 @@ def _find_impl(self, location: str) -> Iterator[FoundResult]:
self._found_names.add(name)
yield dist, info_location

def find_meta_path_distributions(self) -> Iterator[FoundResult]:
"""Find distributions from sys.meta_path finders."""
try:
# Get all distributions without specifying a path, which includes
# distributions from sys.meta_path finders
for dist in importlib.metadata.distributions():
info_location = get_info_location(dist)
try:
name = get_dist_canonical_name(dist)
except BadMetadata as e:
logger.warning("Skipping %s due to %s", info_location, e.reason)
continue
# Only yield if we haven't seen this name before
if name in self._found_names:
continue
self._found_names.add(name)
yield dist, info_location
except Exception:
# If there's any issue with finding meta_path distributions,
# don't break the entire discovery process - backwards compatibility
pass

def find(self, location: str) -> Iterator[BaseDistribution]:
"""Find distributions in a location.

Expand Down Expand Up @@ -133,6 +155,21 @@ def _iter_distributions(self) -> Iterator[BaseDistribution]:
yield from finder.find(location)
yield from finder.find_legacy_editables(location)

# Also check for distributions from sys.meta_path finders
# This is backwards compatible - if no custom finders exist, this does nothing
yield from self._iter_meta_path_distributions(finder)

def _iter_meta_path_distributions(
self, finder: _DistributionFinder
) -> Iterator[Distribution]:
"""Yield distributions from sys.meta_path finders."""
for dist, info_location in finder.find_meta_path_distributions():
if info_location is None:
installed_location: BasePath | None = None
else:
installed_location = info_location.parent
yield Distribution(dist, info_location, installed_location)

def get_distribution(self, name: str) -> BaseDistribution | None:
canonical_name = canonicalize_name(name)
matches = (
Expand Down
52 changes: 42 additions & 10 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Union, cast

from pip._vendor.packaging.requirements import InvalidRequirement
from pip._vendor.packaging.requirements import (
InvalidRequirement,
)
from pip._vendor.packaging.requirements import (
Requirement as PackagingRequirement,
)
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import Version

Expand Down Expand Up @@ -159,8 +164,11 @@ def __init__(
self._ireq = ireq
self._name = name
self._version = version
self.dist = self._prepare()
self._hash: int | None = None
# Cache for parsed dependencies to avoid multiple iterations
self._cached_dependencies: list[PackagingRequirement] | None = None
self._cached_extras: list[NormalizedName] | None = None
self.dist = self._prepare()

def __str__(self) -> str:
return f"{self.name} {self.version}"
Expand Down Expand Up @@ -207,6 +215,22 @@ def format_for_error(self) -> str:
f"(from {self._link.file_path if self._link.is_file else self._link})"
)

def _get_cached_dependencies(self) -> list[PackagingRequirement]:
"""Get cached dependencies, parsing them only once."""
if self._cached_dependencies is None:
if self._cached_extras is None:
self._cached_extras = list(self.dist.iter_provided_extras())
self._cached_dependencies = list(
self.dist.iter_dependencies(self._cached_extras)
)
return self._cached_dependencies

def _get_cached_extras(self) -> list[NormalizedName]:
"""Get cached extras, parsing them only once."""
if self._cached_extras is None:
self._cached_extras = list(self.dist.iter_provided_extras())
return self._cached_extras

def _prepare_distribution(self) -> BaseDistribution:
raise NotImplementedError("Override in subclass")

Expand All @@ -227,10 +251,14 @@ def _check_metadata_consistency(self, dist: BaseDistribution) -> None:
str(dist.version),
)
# check dependencies are valid
# TODO performance: this means we iterate the dependencies at least twice,
# we may want to cache parsed Requires-Dist
# Parse and cache dependencies during validation to avoid re-parsing later
try:
list(dist.iter_dependencies(list(dist.iter_provided_extras())))
if self._cached_extras is None:
self._cached_extras = list(dist.iter_provided_extras())
if self._cached_dependencies is None:
self._cached_dependencies = list(
dist.iter_dependencies(self._cached_extras)
)
except InvalidRequirement as e:
raise MetadataInvalid(self._ireq, str(e))

Expand All @@ -255,9 +283,11 @@ def iter_dependencies(self, with_requires: bool) -> Iterable[Requirement | None]
# Emit the Requires-Python requirement first to fail fast on
# unsupported candidates and avoid pointless downloads/preparation.
yield self._factory.make_requires_python_requirement(self.dist.requires_python)
requires = self.dist.iter_dependencies() if with_requires else ()
for r in requires:
yield from self._factory.make_requirements_from_spec(str(r), self._ireq)
if with_requires:
# Use cached dependencies to avoid re-parsing
requires = self._get_cached_dependencies()
for r in requires:
yield from self._factory.make_requirements_from_spec(str(r), self._ireq)

def get_install_requirement(self) -> InstallRequirement | None:
return self._ireq
Expand Down Expand Up @@ -515,8 +545,10 @@ def iter_dependencies(self, with_requires: bool) -> Iterable[Requirement | None]

# The user may have specified extras that the candidate doesn't
# support. We ignore any unsupported extras here.
valid_extras = self.extras.intersection(self.base.dist.iter_provided_extras())
invalid_extras = self.extras.difference(self.base.dist.iter_provided_extras())
# Cache provided_extras to avoid multiple iterations
provided_extras = set(self.base.dist.iter_provided_extras())
valid_extras = self.extras.intersection(provided_extras)
invalid_extras = self.extras.difference(provided_extras)
for extra in sorted(invalid_extras):
logger.warning(
"%s %s does not provide the extra '%s'",
Expand Down
5 changes: 4 additions & 1 deletion src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,10 @@ def _get_installed_candidate() -> Candidate | None:
try:
# Don't use the installed distribution if its version
# does not fit the current dependency graph.
if not specifier.contains(installed_dist.version, prereleases=True):
version_check = specifier.contains(
installed_dist.version, prereleases=True
)
if not version_check:
Comment on lines -284 to +287
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needlessly adds to the diff, I would revert this part.

return None
except InvalidVersion as e:
raise InvalidInstalledPackage(dist=installed_dist, invalid_exc=e)
Expand Down
12 changes: 10 additions & 2 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -2582,8 +2582,16 @@ def find_distributions(self, context=None):
sys.meta_path.append(CustomFinder())
"""
)
with open(script.site_packages_path / "sitecustomize.py", "w") as sitecustomize:
sitecustomize.write(sitecustomize_text)
# Create a custom module and add to site-packages
custom_module_path = script.site_packages_path / "setup_custom_finder.py"
with open(custom_module_path, "w") as f:
f.write(sitecustomize_text)

# Create a .pth file that imports our module
pth_content = "import setup_custom_finder"
pth_file = script.site_packages_path / "setup_custom_finder.pth"
with open(pth_file, "w") as f:
f.write(pth_content)

result = script.pip("install", "example")

Expand Down
Loading