From 8fa5a9f4078b8abad786d912775d5041a8ef85e5 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Fri, 24 Oct 2025 02:35:52 +0300 Subject: [PATCH 1/6] implementation comment --- metaflow/plugins/pypi/pip.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index a9c74ca6c3f..25e9cd734e7 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -119,7 +119,13 @@ def solve(self, id_, packages, python, platform): else: cmd.append(f"{package}=={version}") try: + if "torch" in packages.keys(): + print(cmd) self._call(prefix, cmd) + # We are interested in the outputs 'requires_dist', and whether it contains platform_system == X or platform_machine == Y + # where X or Y are different than the environment performing the resolving. + # In this case we want to do a second pass, which will try to add these packages _without_ the environment markers + # in order to assure that all relevant packages are present in the target environment except PipPackageNotFound as ex: # pretty print package errors raise PipException( From 324abcd5572238ecbc6c14bbd862975625055438 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Fri, 24 Oct 2025 13:55:28 +0300 Subject: [PATCH 2/6] add environment marker checks for pip solve. add recursive passes to solving environment in case dependencies were added --- metaflow/plugins/pypi/pip.py | 83 +++++++++++++++++++++++++++++++--- metaflow/plugins/pypi/utils.py | 9 ++++ 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index 25e9cd734e7..3837809346e 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -12,7 +12,8 @@ from metaflow.exception import MetaflowException from .micromamba import Micromamba -from .utils import pip_tags, wheel_tags +from .utils import pip_tags, wheel_tags, conda_platform, markers_from_platform +import platform class PipException(MetaflowException): @@ -119,13 +120,7 @@ def solve(self, id_, packages, python, platform): else: cmd.append(f"{package}=={version}") try: - if "torch" in packages.keys(): - print(cmd) self._call(prefix, cmd) - # We are interested in the outputs 'requires_dist', and whether it contains platform_system == X or platform_machine == Y - # where X or Y are different than the environment performing the resolving. - # In this case we want to do a second pass, which will try to add these packages _without_ the environment markers - # in order to assure that all relevant packages are present in the target environment except PipPackageNotFound as ex: # pretty print package errors raise PipException( @@ -156,6 +151,80 @@ def _format(dl_info): res["hash"] = vcs_info["commit_id"] return res + def _extract_platform_specific_deps( + pkg_metadata, target_system, target_machine + ): + # We are interested in the outputs 'metadata.requires_dist', and whether it contains platform_system or platform_machine markers + # that are different than the environment performing the resolving. + # In this case we want to do a second pass, which will try to add these packages _without_ the environment markers + # in order to assure that all relevant packages are present in the target environment + # e.g. + # "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == \"Linux\" and platform_machine == \"x86_64\"", + deps = pkg_metadata.get("requires_dist") + deps_with_markers = {} + if deps is None: + return deps_with_markers + for dep in deps: + pkg, *markers = dep.split(";", 1) + if not markers: + continue + match_system = re.match( + r"^.*platform_system == (.*?)\s", markers[0] + ) + plat_system = ( + match_system.groups()[0].strip('"') if match_system else None + ) + + match_machine = re.match( + r"^.*platform_machine == (.*?)\s", markers[0] + ) + plat_machine = ( + match_machine.groups()[0].strip('"') if match_machine else None + ) + + if plat_system is None and plat_machine is None: + continue + + if plat_system == target_system or plat_machine == target_machine: + # we must make sure that this dependency gets added to the list, + # as it will not be carried by the default resolve due to platform/machine mismatch. + # TODO: keep the original constraint, as they might not be strict version pins. + p, v = pkg.split("==") + deps_with_markers[p] = v + + return deps_with_markers + + # NOTE: Make sure to run this only if current platform and target platform are a mismatch. + # i.e. we are doing a cross-platform resolve! + if conda_platform() != platform: + debug.conda_exec( + "Current platform differs from target platform. Performing a check for environment markers in package dependencies that might end up being not included otherwise." + ) + requested_sys, requested_machine = markers_from_platform(platform) + debug.conda_exec( + f"Checking for environment markers 'platform_system == {requested_sys}' and 'platform_machine == {requested_machine}'" + ) + with open(report, mode="r", encoding="utf-8") as f: + deps_to_add = { + k: v + for item in json.load(f)["install"] + for k, v in _extract_platform_specific_deps( + item.get("metadata", {}), requested_sys, requested_machine + ).items() + } + + added_deps = False + for dep, ver in deps_to_add.items(): + if dep not in packages: + added_deps = True + packages[dep] = ver + + if added_deps: + debug.conda_exec( + "Added dependencies due to environment markers, have to re-solve the environment with new ones." + ) + return self.solve(id_, packages, python, platform) + with open(report, mode="r", encoding="utf-8") as f: return [ _format(item["download_info"]) for item in json.load(f)["install"] diff --git a/metaflow/plugins/pypi/utils.py b/metaflow/plugins/pypi/utils.py index 8e3914f588d..1af46e20f68 100644 --- a/metaflow/plugins/pypi/utils.py +++ b/metaflow/plugins/pypi/utils.py @@ -41,6 +41,15 @@ def conda_platform(): return "osx-64" +def markers_from_platform(platform): + plat, mach = platform.split("-") + + platform_system = {"osx": "Darwin", "linux": "Linux"}[plat] + platform_machine = {"32": "x86", "64": "x86_64", "arm64": "aarch64"}[mach] + + return platform_system, platform_machine + + def wheel_tags(wheel): _, _, _, tags = parse_wheel_filename(wheel) return list(tags) From 76e03c6db67ef5e6dd63a1d2d7a9b6cb476ea27e Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Fri, 24 Oct 2025 16:54:14 +0300 Subject: [PATCH 3/6] limit solve recursion depth --- metaflow/plugins/pypi/pip.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index 3837809346e..7807093d2a2 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -45,6 +45,7 @@ def __init__(self, error): METADATA_FILE = "{prefix}/.pip/metadata" INSTALLATION_MARKER = "{prefix}/.pip/id" +MAX_SOLVE_ITERATIONS = 1 # TODO: # 1. Support local dirs, non-wheel like packages @@ -70,7 +71,7 @@ def _get_resolved_python_version(self, prefix): except Exception: return None - def solve(self, id_, packages, python, platform): + def solve(self, id_, packages, python, platform, iterations=0): prefix = self.micromamba.path_to_environment(id_) if prefix is None: msg = "Unable to locate a Micromamba managed virtual environment\n" @@ -196,7 +197,7 @@ def _extract_platform_specific_deps( # NOTE: Make sure to run this only if current platform and target platform are a mismatch. # i.e. we are doing a cross-platform resolve! - if conda_platform() != platform: + if (conda_platform() != platform) and iterations < MAX_SOLVE_ITERATIONS: debug.conda_exec( "Current platform differs from target platform. Performing a check for environment markers in package dependencies that might end up being not included otherwise." ) @@ -223,7 +224,9 @@ def _extract_platform_specific_deps( debug.conda_exec( "Added dependencies due to environment markers, have to re-solve the environment with new ones." ) - return self.solve(id_, packages, python, platform) + return self.solve( + id_, packages, python, platform, iterations=iterations + 1 + ) with open(report, mode="r", encoding="utf-8") as f: return [ From 90ed708a9561a74825d4d401c060f7638a836a75 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Sat, 25 Oct 2025 01:49:33 +0300 Subject: [PATCH 4/6] use Requirements class for easier req parsing in order to retain correct specifiers --- metaflow/plugins/pypi/pip.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index 7807093d2a2..32694e09303 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -13,7 +13,7 @@ from .micromamba import Micromamba from .utils import pip_tags, wheel_tags, conda_platform, markers_from_platform -import platform +from packaging.requirements import Requirement class PipException(MetaflowException): @@ -166,18 +166,18 @@ def _extract_platform_specific_deps( if deps is None: return deps_with_markers for dep in deps: - pkg, *markers = dep.split(";", 1) - if not markers: + req = Requirement(dep) + if not req.marker: continue match_system = re.match( - r"^.*platform_system == (.*?)\s", markers[0] + r"^.*platform_system == (.*?)\s", str(req.marker) ) plat_system = ( match_system.groups()[0].strip('"') if match_system else None ) match_machine = re.match( - r"^.*platform_machine == (.*?)\s", markers[0] + r"^.*platform_machine == (.*?)\s", str(req.marker) ) plat_machine = ( match_machine.groups()[0].strip('"') if match_machine else None @@ -189,9 +189,10 @@ def _extract_platform_specific_deps( if plat_system == target_system or plat_machine == target_machine: # we must make sure that this dependency gets added to the list, # as it will not be carried by the default resolve due to platform/machine mismatch. - # TODO: keep the original constraint, as they might not be strict version pins. - p, v = pkg.split("==") - deps_with_markers[p] = v + version = str(req.specifier) + if version.startswith("=="): + version = version[2:] + deps_with_markers[req.name] = version return deps_with_markers From c2a24dfd78b441b4c42219d48e0c0c8d5d0161d0 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Sat, 25 Oct 2025 02:23:10 +0300 Subject: [PATCH 5/6] scope the import for packaging --- metaflow/plugins/pypi/pip.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index 32694e09303..f2e6a397ce0 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -13,7 +13,6 @@ from .micromamba import Micromamba from .utils import pip_tags, wheel_tags, conda_platform, markers_from_platform -from packaging.requirements import Requirement class PipException(MetaflowException): @@ -152,6 +151,8 @@ def _format(dl_info): res["hash"] = vcs_info["commit_id"] return res + from packaging.requirements import Requirement + def _extract_platform_specific_deps( pkg_metadata, target_system, target_machine ): From 05dad03a728f01f6a015c801bbc9624a39b376b7 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Thu, 30 Oct 2025 15:44:48 +0200 Subject: [PATCH 6/6] use vendored Requirements --- metaflow/plugins/pypi/pip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaflow/plugins/pypi/pip.py b/metaflow/plugins/pypi/pip.py index f2e6a397ce0..e15a5f2f30c 100644 --- a/metaflow/plugins/pypi/pip.py +++ b/metaflow/plugins/pypi/pip.py @@ -151,7 +151,7 @@ def _format(dl_info): res["hash"] = vcs_info["commit_id"] return res - from packaging.requirements import Requirement + from metaflow._vendor.packaging.requirements import Requirement def _extract_platform_specific_deps( pkg_metadata, target_system, target_machine