meta-pytorch
diff --git a/‎.github/workflows/build-cpu.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/build-cpu.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/test-cpu.yml
Lines changed: 7 additions & 5 deletions b/‎.github/workflows/test-cpu.yml
Lines changed: 7 additions & 5 deletions
diff --git a/‎.github/workflows/test-cuda.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test-cuda.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/wheels.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎python/monarch/_testing.py
Lines changed: 2 additions & 1 deletion b/‎python/monarch/_testing.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎python/tests/builtins/test_log.py
Lines changed: 1 addition & 1 deletion b/‎python/tests/builtins/test_log.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/tests/builtins/test_random.py
Lines changed: 1 addition & 1 deletion b/‎python/tests/builtins/test_random.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/tests/test_coalescing.py renamed to ‎python/tests/tensor_engine/test_coalescing.py
Lines changed: 1 addition & 1 deletion b/‎python/tests/test_coalescing.py renamed to ‎python/tests/tensor_engine/test_coalescing.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/tests/test_controller.py renamed to ‎python/tests/tensor_engine/test_controller.py
Lines changed: 6 additions & 4 deletions b/‎python/tests/test_controller.py renamed to ‎python/tests/tensor_engine/test_controller.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎python/tests/test_debugger.py renamed to ‎python/tests/tensor_engine/test_debugger.py b/‎python/tests/test_debugger.py renamed to ‎python/tests/tensor_engine/test_debugger.py
@@ -29,5 +29,8 @@ jobs:
         # Setup build environment (conda + system deps + rust + build deps)
         setup_build_environment
 
+        # Build the process allocator binary
+        build_process_allocator
+
         # Build monarch (No tensor engine, CPU version)
         USE_TENSOR_ENGINE=0 python setup.py bdist_wheel
@@ -26,15 +26,17 @@ jobs:
         source scripts/common-setup.sh
 
         # Setup test environment
-        setup_conda_environment
+        setup_test_environment
+
+        # Install cargo binaries
+        mkdir cargo_bin && mv ${RUNNER_ARTIFACT_DIR}/cargo_bin/* cargo_bin
+        chmod +x cargo_bin/process_allocator
+        export PATH=$(pwd)/cargo_bin:$PATH
 
         # Disable tensor engine
         export USE_TENSOR_ENGINE=0
 
         # Install the built wheel from artifact
         install_wheel_from_artifact
 
-        # Currently a no-op.
-        # Tests requiring tensor engine / GPU need to be identified and flagged to skip.
-        # We will just ensure monarch can be imported successfully.
-        python -c "import monarch; print('Monarch imported successfully')"
+        LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" --ignore=python/tests/tensor_engine -n 4
@@ -55,5 +55,5 @@ jobs:
         pyright python/tests/test_python_actors.py
 
         # Run CUDA tests
-        LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip"
-        python python/tests/test_mock_cuda.py
+        LC_ALL=C pytest python/tests/tensor_engine -s -v -m "not oss_skip" -n 4
+        python python/tests/tensor_engine/test_mock_cuda.py
@@ -35,12 +35,11 @@ jobs:
       script: |
         source scripts/common-setup.sh
         setup_build_environment ${{ matrix.python-version }}
+        cargo install --path monarch_hyperactor
 
         # Setup Tensor Engine dependencies
         setup_tensor_engine
 
-        cargo install --path monarch_hyperactor
-
         # Build wheel
         export MONARCH_PACKAGE_NAME="torchmonarch-nightly"
         export MONARCH_VERSION=$(date +'%Y.%m.%d')
@@ -54,7 +53,8 @@ jobs:
         # Run tests
         install_python_test_dependencies
         pip install dist/*.whl
-        python -c "import monarch"
+        LC_ALL=C pytest python/tests/test_python_actors.py -s -v -m "not oss_skip"
+
   publish:
     name: Publish to PyPI
     needs: build
 
@@ -10,6 +10,7 @@
 import tempfile
 import time
 from contextlib import contextmanager, ExitStack
+from enum import Enum
 from typing import Any, Callable, Dict, Generator, Literal, Optional
 
 import monarch_supervisor
@@ -225,7 +226,7 @@ def exit(
     return dm
 
 
-class BackendType:
+class BackendType(Enum):
     PY = "py"
     RS = "rs"
     MESH = "mesh"
@@ -30,7 +30,7 @@ def local_device_mesh(cls, num_hosts, gpu_per_host, backend_type, activate=True)
             num_hosts,
             gpu_per_host,
             activate,
-            backend=str(backend_type),
+            backend=backend_type.value,
         )
 
     @patch("monarch.builtins.log.logger")
 
@@ -43,7 +43,7 @@ def local_device_mesh(cls, num_hosts, gpu_per_host, backend_type, activate=True)
             num_hosts,
             gpu_per_host,
             activate,
-            backend=str(backend_type),
+            backend=backend_type.value,
         )
 
     def test_set_manual_seed_remote(self, backend_type):
 
@@ -78,7 +78,7 @@ def local_device_mesh(
             num_hosts,
             gpu_per_host,
             activate,
-            backend=str(backend_type),
+            backend=backend_type.value,
         )
 
     @property
 
@@ -96,7 +96,9 @@ def local_rust_device_mesh(
     torch.cuda.device_count() < 2,
     reason="Not enough GPUs, this test requires at least 2 GPUs",
 )
-@pytest.mark.parametrize("backend_type", [BackendType.PY, BackendType.RS, "mesh"])
+@pytest.mark.parametrize(
+    "backend_type", [BackendType.PY, BackendType.RS, BackendType.MESH]
+)
 # Set global timeout--sandcastle's timeout is 600s. A test that sandcastle times
 # out is not counted as a failure, so we set a more restrictive timeout to
 # ensure we see a hard failure in CI.
@@ -114,7 +116,7 @@ def local_device_mesh(
             N,
             gpu_per_host,
             activate,
-            backend=str(backend_type),
+            backend=backend_type.value,
         )
 
     def test_errors(self, backend_type):
@@ -176,7 +178,7 @@ def test_sub_mesh_use_only_one(self, backend_type):
             local_x = local_x.result(timeout=20)
             assert torch.equal(local_x, torch.ones(3, 4))
 
-    def test_sub_mesh_process_grop(self, backend_type):
+    def test_sub_mesh_process_group(self, backend_type):
         with self.local_device_mesh(2, 2, backend_type, activate=False) as device_mesh:
             h0 = device_mesh.slice(host=0)
             pg0 = h0.process_group(("gpu",))
@@ -603,7 +605,7 @@ def test_to_mesh_pytree(self, backend_type):
         assert torch.equal(moved_tensor_b, torch.tensor([2.0]))
 
     def test_hanging_error(self, backend_type):
-        if backend_type != "mesh":
+        if backend_type != BackendType.MESH:
             pytest.skip("only relevant for mesh backend")
         with self.local_device_mesh(2, 2, backend_type) as device_mesh:
             remote(lambda: torch.rand(3) + torch.rand(4), propagate=lambda: None)()
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ def local_device_mesh(cls, num_hosts, gpu_per_host, backend_type, activate=True)`
`30`	`30`	`num_hosts,`
`31`	`31`	`gpu_per_host,`
`32`	`32`	`activate,`
`33`		`- backend=str(backend_type),`
	`33`	`+ backend=backend_type.value,`
`34`	`34`	`)`
`35`	`35`
`36`	`36`	`@patch("monarch.builtins.log.logger")`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ def local_device_mesh(cls, num_hosts, gpu_per_host, backend_type, activate=True)`
`43`	`43`	`num_hosts,`
`44`	`44`	`gpu_per_host,`
`45`	`45`	`activate,`
`46`		`- backend=str(backend_type),`
	`46`	`+ backend=backend_type.value,`
`47`	`47`	`)`
`48`	`48`
`49`	`49`	`def test_set_manual_seed_remote(self, backend_type):`
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ def local_device_mesh(`
`78`	`78`	`num_hosts,`
`79`	`79`	`gpu_per_host,`
`80`	`80`	`activate,`
`81`		`- backend=str(backend_type),`
	`81`	`+ backend=backend_type.value,`
`82`	`82`	`)`
`83`	`83`
`84`	`84`	`@property`