Merge remote-tracking branch 'upstream/master' into docs/12119_emphasize-lines

tsubasakong · tsubasakong · commit 70dd22b91c22 · 2026-03-06T09:21:30.000-08:00
diff --git a/.github/markdown-links-config.json b/.github/markdown-links-config.json
@@ -8,6 +8,12 @@
     },
     {
       "pattern": "^https://codecov.io/gh/Lightning-AI/pytorch-lightning/graph/badge.svg"
+    },
+    {
+      "pattern": "^https://app.neptune.ai"
+    },
+    {
+      "pattern": "^https://www.neptune.ai/"
     }
   ],
   "httpHeaders": [
diff --git a/docs/source-pytorch/common/optimization.rst b/docs/source-pytorch/common/optimization.rst
@@ -57,6 +57,63 @@ Should you still require the flexibility of calling ``.zero_grad()``, ``.backwar
 always switch to :ref:`manual optimization <manual_optimization>`.
 Manual optimization is required if you wish to work with multiple optimizers.
 
+.. _lr_scheduling:
+
+Learning Rate Scheduling
+========================
+
+Lightning supports learning rate schedulers configured via :meth:`~lightning.pytorch.core.LightningModule.configure_optimizers`.
+In **automatic optimization**, Lightning will call ``scheduler.step()`` for you automatically —
+you do not need to call it manually.
+
+A simple example returning both an optimizer and a scheduler:
+
+.. code-block:: python
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "interval": "epoch",  # "epoch" (default) or "step"
+                "frequency": 1,       # how often to call scheduler.step(); default is 1
+            },
+        }
+
+The ``interval`` and ``frequency`` keys control when ``scheduler.step()`` is called:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 15 15 70
+
+   * - ``interval``
+     - ``frequency``
+     - Behavior
+   * - ``"epoch"`` (default)
+     - 1 (default)
+     - ``scheduler.step()`` is called once at the end of every epoch
+   * - ``"epoch"``
+     - N
+     - ``scheduler.step()`` is called at the end of every N epochs
+   * - ``"step"``
+     - 1 (default)
+     - ``scheduler.step()`` is called after every training batch (step)
+   * - ``"step"``
+     - N
+     - ``scheduler.step()`` is called after every N training steps
+
+.. note::
+    If ``interval`` and ``frequency`` are not specified, Lightning defaults to
+    ``interval="epoch"`` and ``frequency=1``, stepping the scheduler once per epoch.
+
+.. note::
+    If you are using **manual optimization**, Lightning will **not** call ``scheduler.step()``
+    automatically. You are responsible for stepping the scheduler yourself inside
+    ``training_step()`` or ``on_train_epoch_end()`` at the appropriate point.
+
+For the full list of supported return formats, see :meth:`~lightning.pytorch.core.LightningModule.configure_optimizers`.
 
 .. _gradient_accumulation:
 
diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py
@@ -639,4 +639,6 @@ def package_list_from_file(file):
     "https://openai.com/index/*",
     "https://tinyurl.com/.*",  # has a human verification check on redirect
     "https://docs.neptune.ai/.*",  # TODO: remove after dropping Neptune support
+    "https://app.neptune.ai/*",
+    "https://www.neptune.ai/*"
 ]
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -25,8 +25,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed `val_check_interval` raising `ValueError` when `limit_val_batches=0` and interval exceeds training batches ([#21560](https://github.com/Lightning-AI/pytorch-lightning/pull/21560))
+
 -
 
+- Fixed ``RichModelSummary`` model size display formatting ([#21467](https://github.com/Lightning-AI/pytorch-lightning/pull/21467))
 
 ---
 
diff --git a/src/lightning/pytorch/callbacks/rich_model_summary.py b/src/lightning/pytorch/callbacks/rich_model_summary.py
@@ -17,7 +17,7 @@
 
 from lightning.pytorch.callbacks import ModelSummary
 from lightning.pytorch.utilities.imports import _RICH_AVAILABLE
-from lightning.pytorch.utilities.model_summary import get_human_readable_count
+from lightning.pytorch.utilities.model_summary import get_formatted_model_size, get_human_readable_count
 
 
 class RichModelSummary(ModelSummary):
@@ -105,8 +105,9 @@ def summarize(
         console.print(table)
 
         parameters = []
-        for param in [trainable_parameters, total_parameters - trainable_parameters, total_parameters, model_size]:
+        for param in [trainable_parameters, total_parameters - trainable_parameters, total_parameters]:
             parameters.append("{:<{}}".format(get_human_readable_count(int(param)), 10))
+        parameters.append("{:<{}}".format(get_formatted_model_size(model_size), 10))
 
         grid = Table.grid(expand=True)
         grid.add_column()
diff --git a/src/lightning/pytorch/loops/fit_loop.py b/src/lightning/pytorch/loops/fit_loop.py
@@ -292,7 +292,11 @@ def setup_data(self) -> None:
             trainer._last_val_time = trainer._train_start_time
         elif isinstance(trainer.val_check_interval, int):
             trainer.val_check_batch = trainer.val_check_interval
-            if trainer.val_check_batch > self.max_batches and trainer.check_val_every_n_epoch is not None:
+            if (
+                trainer.val_check_batch > self.max_batches
+                and trainer.check_val_every_n_epoch is not None
+                and trainer.limit_val_batches > 0
+            ):
                 raise ValueError(
                     f" `val_check_interval` ({trainer.val_check_interval}) must be less than or equal"
                     f" to the number of the training batches ({self.max_batches})."
diff --git a/tests/tests_pytorch/callbacks/test_rich_model_summary.py b/tests/tests_pytorch/callbacks/test_rich_model_summary.py
@@ -70,3 +70,34 @@ def example_input_array(self) -> Any:
     # assert that the input summary data was converted correctly
     args, _ = mock_table_add_row.call_args_list[0]
     assert args[1:] == ("0", "layer", "Linear", "66  ", "train", "512  ", "[4, 32]", "[4, 2]")
+
+
+@RunIf(rich=True)
+def test_rich_summary_model_size_formatting():
+    """Ensure model_size uses get_formatted_model_size, not get_human_readable_count."""
+    from io import StringIO
+
+    from rich.console import Console
+
+    model_summary = RichModelSummary()
+    model = BoringModel()
+    summary = summarize(model)
+    summary_data = summary._get_summary_data()
+
+    output = StringIO()
+    console = Console(file=output, force_terminal=True)
+
+    with mock.patch("rich.get_console", return_value=console):
+        model_summary.summarize(
+            summary_data=summary_data,
+            total_parameters=1,
+            trainable_parameters=1,
+            model_size=5500.0,
+            total_training_modes=summary.total_training_modes,
+            total_flops=1,
+        )
+
+    result = output.getvalue()
+    # model_size=5500.0 should display as "5,500.000" (formatted), not "5.5 K" (human readable count)
+    assert "5,500.000" in result
+    assert "5.5 K" not in result
diff --git a/tests/tests_pytorch/loops/test_training_loop.py b/tests/tests_pytorch/loops/test_training_loop.py
@@ -330,3 +330,18 @@ def on_train_batch_start(self, batch, batch_idx):
     assert len(trainer.lr_scheduler_configs) == 1
     assert all(a == adjusted_lr[0] for a in adjusted_lr)
     assert init_lr * 0.1**max_epochs == adjusted_lr[0]
+
+
+@pytest.mark.parametrize("limit_val_batches", [0, 0.0])
+def test_val_check_interval_with_limit_val_batches_zero(tmp_path, limit_val_batches):
+    """Test that val_check_interval > num training batches does not raise when limit_val_batches=0."""
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmp_path,
+        max_epochs=1,
+        limit_train_batches=5,
+        val_check_interval=10,  # greater than limit_train_batches
+        limit_val_batches=limit_val_batches,  # validation disabled
+    )
+    # Should not raise ValueError
+    trainer.fit(model)

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,12 @@`
`8`	`8`	`},`
`9`	`9`	`{`
`10`	`10`	`"pattern": "^https://codecov.io/gh/Lightning-AI/pytorch-lightning/graph/badge.svg"`
	`11`	`+ },`
	`12`	`+ {`
	`13`	`+ "pattern": "^https://app.neptune.ai"`
	`14`	`+ },`
	`15`	`+ {`
	`16`	`+ "pattern": "^https://www.neptune.ai/"`
`11`	`17`	`}`
`12`	`18`	`],`
`13`	`19`	`"httpHeaders": [`
Original file line number	Diff line number	Diff line change
`@@ -639,4 +639,6 @@ def package_list_from_file(file):`
`639`	`639`	`"https://openai.com/index/*",`
`640`	`640`	`"https://tinyurl.com/.*", # has a human verification check on redirect`
`641`	`641`	`"https://docs.neptune.ai/.*", # TODO: remove after dropping Neptune support`
	`642`	`+ "https://app.neptune.ai/*",`
	`643`	`+ "https://www.neptune.ai/*"`
`642`	`644`	`]`
Original file line number	Diff line number	Diff line change
`@@ -25,8 +25,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).`
`25`	`25`
`26`	`26`	`### Fixed`
`27`	`27`
	`28`	+- Fixed `val_check_interval` raising `ValueError` when `limit_val_batches=0` and interval exceeds training batches ([#21560](https://github.com/Lightning-AI/pytorch-lightning/pull/21560))
	`29`	`+`
`28`	`30`	`-`
`29`	`31`
	`32`	+- Fixed ``RichModelSummary`` model size display formatting ([#21467](https://github.com/Lightning-AI/pytorch-lightning/pull/21467))
`30`	`33`
`31`	`34`	`---`
`32`	`35`