fix(strategies): correct device_mesh type hint in FSDP strategies (#21581)

littlebullGit · web-flow · commit 2816d8c86111 · 2026-03-17T07:04:26.000+01:00
diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md
@@ -24,7 +24,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
--
+- Fixed `device_mesh` type hint in `FSDPStrategy` to accept a 2-element tuple via the CLI ([#21581](https://github.com/Lightning-AI/pytorch-lightning/pull/21581))
 
 ---
 
diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py
@@ -150,7 +150,7 @@ def __init__(
         activation_checkpointing_policy: Optional["_POLICY"] = None,
         sharding_strategy: "_SHARDING_STRATEGY" = "FULL_SHARD",
         state_dict_type: Literal["full", "sharded"] = "sharded",
-        device_mesh: Optional[Union[tuple[int], "DeviceMesh"]] = None,
+        device_mesh: Optional[Union[tuple[int, int], "DeviceMesh"]] = None,
         **kwargs: Any,
     ) -> None:
         super().__init__(
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -27,7 +27,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed `val_check_interval` raising `ValueError` when `limit_val_batches=0` and interval exceeds training batches ([#21560](https://github.com/Lightning-AI/pytorch-lightning/pull/21560))
 
--
+- Fixed `device_mesh` type hint in `FSDPStrategy` to accept a 2-element tuple via the CLI ([#21581](https://github.com/Lightning-AI/pytorch-lightning/pull/21581))
 
 - Fixed ``RichModelSummary`` model size display formatting ([#21467](https://github.com/Lightning-AI/pytorch-lightning/pull/21467))
 
diff --git a/src/lightning/pytorch/strategies/fsdp.py b/src/lightning/pytorch/strategies/fsdp.py
@@ -160,7 +160,7 @@ def __init__(
         activation_checkpointing_policy: Optional["_POLICY"] = None,
         sharding_strategy: "_SHARDING_STRATEGY" = "FULL_SHARD",
         state_dict_type: Literal["full", "sharded"] = "full",
-        device_mesh: Optional[Union[tuple[int], "DeviceMesh"]] = None,
+        device_mesh: Optional[Union[tuple[int, int], "DeviceMesh"]] = None,
         **kwargs: Any,
     ) -> None:
         super().__init__(
diff --git a/tests/tests_fabric/strategies/test_fsdp.py b/tests/tests_fabric/strategies/test_fsdp.py
@@ -402,3 +402,15 @@ def test_get_full_state_dict_context_offload(set_type_mock, monkeypatch):
     with _get_full_state_dict_context(module=Mock(spec=FullyShardedDataParallel), world_size=4):
         assert set_type_mock.call_args_list[0][0][2].offload_to_cpu  # model config
         assert set_type_mock.call_args_list[0][0][3].offload_to_cpu  # optim config
+
+
+def test_device_mesh_type_annotation():
+    """Test that ``device_mesh`` type hint accepts a 2-element tuple via jsonargparse (#21580)."""
+    jsonargparse = pytest.importorskip("jsonargparse")
+    from inspect import signature
+
+    annot = signature(FSDPStrategy).parameters["device_mesh"].annotation
+    parser = jsonargparse.ArgumentParser()
+    parser.add_argument("--device_mesh", type=annot)
+    args = parser.parse_args(["--device_mesh=[1, 4]"])
+    assert args.device_mesh == (1, 4)
diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py
@@ -966,3 +966,15 @@ def configure_optimizers(self):
         max_steps=4,
     )
     trainer.fit(model, ckpt_path=checkpoint_path_full)
+
+
+def test_device_mesh_type_annotation():
+    """Test that ``device_mesh`` type hint accepts a 2-element tuple via jsonargparse (#21580)."""
+    jsonargparse = pytest.importorskip("jsonargparse")
+    from inspect import signature
+
+    annot = signature(FSDPStrategy).parameters["device_mesh"].annotation
+    parser = jsonargparse.ArgumentParser()
+    parser.add_argument("--device_mesh", type=annot)
+    args = parser.parse_args(["--device_mesh=[1, 4]"])
+    assert args.device_mesh == (1, 4)