Change the setup lambda to take no params (#641)

jayasi · facebook-github-bot · commit f48544952630 · 2025-07-25T14:19:18.000-07:00
Summary: Pull Request resolved: #641 To maintain consistency, not exposing MonarchContext to the public API. Users can call current_rank().rank to get the current rank in their setup method. Reviewed By: suo Differential Revision: D78929218 fbshipit-source-id: df2c5c19a8efb9b7d6e69b902a01edda57c0ae7a
diff --git a/python/monarch/_src/actor/proc_mesh.py b/python/monarch/_src/actor/proc_mesh.py
@@ -43,7 +43,6 @@
     Actor,
     ActorMeshRef,
     fake_sync_state,
-    MonarchContext,
 )
 
 from monarch._src.actor.allocator import LocalAllocator, ProcessAllocator, SimAllocator
@@ -89,7 +88,7 @@ class SetupActor(Actor):
     Typically used to setup the environment variables.
     """
 
-    def __init__(self, env: Callable[[MonarchContext], None]) -> None:
+    def __init__(self, env: Callable[[], None]) -> None:
         """
         Initialize the setup actor with the user defined setup method.
         """
@@ -100,8 +99,7 @@ async def setup(self) -> None:
         """
         Call the user defined setup method with the monarch context.
         """
-        ctx = MonarchContext.get()
-        self._setup_method(ctx)
+        self._setup_method()
 
 
 T = TypeVar("T")
@@ -114,7 +112,7 @@ async def setup(self) -> None:
 
 
 async def _allocate_nonblocking(
-    alloc: Alloc, setup: Callable[[MonarchContext], None] | None = None
+    alloc: Alloc, setup: Callable[[], None] | None = None
 ) -> "ProcMesh":
     _proc_mesh = await HyProcMesh.allocate_nonblocking(alloc)
     if setup is None:
@@ -211,15 +209,25 @@ async def monitor_loop(monitor):
 
     @classmethod
     def from_alloc(
-        self, alloc: Alloc, setup: Callable[[MonarchContext], None] | None = None
+        self, alloc: Alloc, setup: Callable[[], None] | None = None
     ) -> Future["ProcMesh"]:
         """
         Allocate a process mesh according to the provided alloc.
         Returns when the mesh is fully allocated.
 
         Arguments:
         - `alloc`: The alloc to allocate according to.
-        - `setup`: A lambda taking MonarchContext as param, can be used to setup env vars on the allocated mesh
+        - `setup`: An optional lambda function to configure environment variables on the allocated mesh.
+        Use the `current_rank()` method within the lambda to obtain the rank.
+
+        Example of a setup method to initialize torch distributed environment variables:
+        ```
+        def setup():
+            rank = current_rank()
+            os.environ["RANK"] = str(rank)
+            os.environ["WORLD_SIZE"] = str(len(rank.shape))
+            os.environ["LOCAL_RANK"] = str(rank["gpus"])
+        ```
         """
         return Future(
             impl=lambda: _allocate_nonblocking(alloc, setup),
@@ -428,7 +436,7 @@ async def proc_mesh_nonblocking(
     gpus: Optional[int] = None,
     hosts: int = 1,
     env: dict[str, str] | None = None,
-    setup: Callable[[MonarchContext], None] | None = None,
+    setup: Callable[[], None] | None = None,
 ) -> ProcMesh:
     if gpus is None:
         gpus = _local_device_count()
@@ -457,7 +465,7 @@ def proc_mesh(
     gpus: Optional[int] = None,
     hosts: int = 1,
     env: dict[str, str] | None = None,
-    setup: Callable[[MonarchContext], None] | None = None,
+    setup: Callable[[], None] | None = None,
 ) -> Future[ProcMesh]:
     return Future(
         impl=lambda: proc_mesh_nonblocking(
diff --git a/python/tests/test_allocator.py b/python/tests/test_allocator.py
@@ -33,7 +33,6 @@
     ChannelTransport,
 )
 
-from monarch._src.actor.actor_mesh import MonarchContext
 from monarch._src.actor.allocator import (
     ALLOC_LABEL_PROC_MESH_NAME,
     LocalAllocator,
@@ -160,7 +159,7 @@ async def test_setup_lambda_with_multiple_env_vars(self) -> None:
             "TEST_ENV_VAR_3": "value_3",
         }
 
-        def setup_multiple_env_vars(ctx: MonarchContext) -> None:
+        def setup_multiple_env_vars() -> None:
             for name, value in env_vars.items():
                 os.environ[name] = value
 
@@ -184,36 +183,33 @@ def setup_multiple_env_vars(ctx: MonarchContext) -> None:
             await proc_mesh.stop()
 
     async def test_setup_lambda_with_context_info(self) -> None:
-        """Test that the setup lambda can access context information"""
-        context_var_name: str = "PROC_MESH_CONTEXT_INFO"
+        """Test that the setup lambda can access rank information"""
+        context_var_name: str = "PROC_MESH_RANK_INFO"
 
-        def setup_with_context(ctx: MonarchContext) -> None:
-            context_info = f"proc_id:{ctx.proc_id},point_rank:{ctx.point.rank}"
+        def setup_with_rank() -> None:
+            context_info = f"point_rank:{current_rank().rank}"
             os.environ[context_var_name] = context_info
 
         spec = AllocSpec(AllocConstraints(), gpus=1, hosts=1)
         allocator = LocalAllocator()
         alloc = await allocator.allocate(spec)
 
-        proc_mesh = await ProcMesh.from_alloc(alloc, setup=setup_with_context)
+        proc_mesh = await ProcMesh.from_alloc(alloc, setup=setup_with_rank)
 
         try:
             actor = await proc_mesh.spawn("env_check", EnvCheckActor)
 
-            context_info = await actor.get_env_var.call_one(context_var_name)
+            rank_info = await actor.get_env_var.call_one(context_var_name)
 
             self.assertNotEqual(
-                context_info,
+                rank_info,
                 "NOT_SET",
                 "Context information was not stored in the environment variable",
             )
-            self.assertIn(
-                "proc_id:", context_info, "Context information does not contain proc_id"
-            )
             self.assertIn(
                 "point_rank:0",
-                context_info,
-                f"Context information {context_info} does not contain point_rank",
+                rank_info,
+                f"Context information {rank_info} does not contain point_rank",
             )
         finally:
             await proc_mesh.stop()
@@ -435,7 +431,7 @@ async def test_setup_lambda_sets_env_vars(self) -> None:
         test_var_name: str = "TEST_ENV_VAR_FOR_PROC_MESH"
         test_var_value: str = "test_value_123"
 
-        def setup_env_vars(ctx: MonarchContext) -> None:
+        def setup_env_vars() -> None:
             os.environ[test_var_name] = test_var_value
 
         hosts = 2
diff --git a/python/tests/test_env_before_cuda.py b/python/tests/test_env_before_cuda.py
@@ -15,7 +15,6 @@
 
 import torch
 from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints, AllocSpec
-from monarch._src.actor.actor_mesh import MonarchContext
 from monarch._src.actor.allocator import LocalAllocator
 from monarch._src.actor.proc_mesh import proc_mesh
 from monarch.actor import Actor, endpoint, ProcMesh
@@ -70,7 +69,7 @@ async def test_lambda_sets_env_vars_before_cuda_init(self) -> None:
             "CUDA_LAUNCH_BLOCKING": "1",
         }
 
-        def setup_cuda_env(_: MonarchContext) -> None:
+        def setup_cuda_env() -> None:
             for name, value in cuda_env_vars.items():
                 os.environ[name] = value
 
@@ -107,7 +106,7 @@ async def test_proc_mesh_with_lambda_env(self) -> None:
             "CUDA_DEVICE_MAX_CONNECTIONS": "1",
         }
 
-        def setup_cuda_env(_: MonarchContext) -> None:
+        def setup_cuda_env() -> None:
             for name, value in cuda_env_vars.items():
                 os.environ[name] = value