Skip to content

Commit 70dd22b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into docs/12119_emphasize-lines
2 parents 05a6ef7 + c05cadb commit 70dd22b

File tree

8 files changed

+122
-3
lines changed

8 files changed

+122
-3
lines changed

.github/markdown-links-config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
},
99
{
1010
"pattern": "^https://codecov.io/gh/Lightning-AI/pytorch-lightning/graph/badge.svg"
11+
},
12+
{
13+
"pattern": "^https://app.neptune.ai"
14+
},
15+
{
16+
"pattern": "^https://www.neptune.ai/"
1117
}
1218
],
1319
"httpHeaders": [

docs/source-pytorch/common/optimization.rst

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,63 @@ Should you still require the flexibility of calling ``.zero_grad()``, ``.backwar
5757
always switch to :ref:`manual optimization <manual_optimization>`.
5858
Manual optimization is required if you wish to work with multiple optimizers.
5959

60+
.. _lr_scheduling:
61+
62+
Learning Rate Scheduling
63+
========================
64+
65+
Lightning supports learning rate schedulers configured via :meth:`~lightning.pytorch.core.LightningModule.configure_optimizers`.
66+
In **automatic optimization**, Lightning will call ``scheduler.step()`` for you automatically —
67+
you do not need to call it manually.
68+
69+
A simple example returning both an optimizer and a scheduler:
70+
71+
.. code-block:: python
72+
73+
def configure_optimizers(self):
74+
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
75+
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
76+
return {
77+
"optimizer": optimizer,
78+
"lr_scheduler": {
79+
"scheduler": scheduler,
80+
"interval": "epoch", # "epoch" (default) or "step"
81+
"frequency": 1, # how often to call scheduler.step(); default is 1
82+
},
83+
}
84+
85+
The ``interval`` and ``frequency`` keys control when ``scheduler.step()`` is called:
86+
87+
.. list-table::
88+
:header-rows: 1
89+
:widths: 15 15 70
90+
91+
* - ``interval``
92+
- ``frequency``
93+
- Behavior
94+
* - ``"epoch"`` (default)
95+
- 1 (default)
96+
- ``scheduler.step()`` is called once at the end of every epoch
97+
* - ``"epoch"``
98+
- N
99+
- ``scheduler.step()`` is called at the end of every N epochs
100+
* - ``"step"``
101+
- 1 (default)
102+
- ``scheduler.step()`` is called after every training batch (step)
103+
* - ``"step"``
104+
- N
105+
- ``scheduler.step()`` is called after every N training steps
106+
107+
.. note::
108+
If ``interval`` and ``frequency`` are not specified, Lightning defaults to
109+
``interval="epoch"`` and ``frequency=1``, stepping the scheduler once per epoch.
110+
111+
.. note::
112+
If you are using **manual optimization**, Lightning will **not** call ``scheduler.step()``
113+
automatically. You are responsible for stepping the scheduler yourself inside
114+
``training_step()`` or ``on_train_epoch_end()`` at the appropriate point.
115+
116+
For the full list of supported return formats, see :meth:`~lightning.pytorch.core.LightningModule.configure_optimizers`.
60117

61118
.. _gradient_accumulation:
62119

docs/source-pytorch/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,4 +639,6 @@ def package_list_from_file(file):
639639
"https://openai.com/index/*",
640640
"https://tinyurl.com/.*", # has a human verification check on redirect
641641
"https://docs.neptune.ai/.*", # TODO: remove after dropping Neptune support
642+
"https://app.neptune.ai/*",
643+
"https://www.neptune.ai/*"
642644
]

src/lightning/pytorch/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
2525

2626
### Fixed
2727

28+
- Fixed `val_check_interval` raising `ValueError` when `limit_val_batches=0` and interval exceeds training batches ([#21560](https://github.com/Lightning-AI/pytorch-lightning/pull/21560))
29+
2830
-
2931

32+
- Fixed ``RichModelSummary`` model size display formatting ([#21467](https://github.com/Lightning-AI/pytorch-lightning/pull/21467))
3033

3134
---
3235

src/lightning/pytorch/callbacks/rich_model_summary.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from lightning.pytorch.callbacks import ModelSummary
1919
from lightning.pytorch.utilities.imports import _RICH_AVAILABLE
20-
from lightning.pytorch.utilities.model_summary import get_human_readable_count
20+
from lightning.pytorch.utilities.model_summary import get_formatted_model_size, get_human_readable_count
2121

2222

2323
class RichModelSummary(ModelSummary):
@@ -105,8 +105,9 @@ def summarize(
105105
console.print(table)
106106

107107
parameters = []
108-
for param in [trainable_parameters, total_parameters - trainable_parameters, total_parameters, model_size]:
108+
for param in [trainable_parameters, total_parameters - trainable_parameters, total_parameters]:
109109
parameters.append("{:<{}}".format(get_human_readable_count(int(param)), 10))
110+
parameters.append("{:<{}}".format(get_formatted_model_size(model_size), 10))
110111

111112
grid = Table.grid(expand=True)
112113
grid.add_column()

src/lightning/pytorch/loops/fit_loop.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,11 @@ def setup_data(self) -> None:
292292
trainer._last_val_time = trainer._train_start_time
293293
elif isinstance(trainer.val_check_interval, int):
294294
trainer.val_check_batch = trainer.val_check_interval
295-
if trainer.val_check_batch > self.max_batches and trainer.check_val_every_n_epoch is not None:
295+
if (
296+
trainer.val_check_batch > self.max_batches
297+
and trainer.check_val_every_n_epoch is not None
298+
and trainer.limit_val_batches > 0
299+
):
296300
raise ValueError(
297301
f" `val_check_interval` ({trainer.val_check_interval}) must be less than or equal"
298302
f" to the number of the training batches ({self.max_batches})."

tests/tests_pytorch/callbacks/test_rich_model_summary.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,34 @@ def example_input_array(self) -> Any:
7070
# assert that the input summary data was converted correctly
7171
args, _ = mock_table_add_row.call_args_list[0]
7272
assert args[1:] == ("0", "layer", "Linear", "66 ", "train", "512 ", "[4, 32]", "[4, 2]")
73+
74+
75+
@RunIf(rich=True)
76+
def test_rich_summary_model_size_formatting():
77+
"""Ensure model_size uses get_formatted_model_size, not get_human_readable_count."""
78+
from io import StringIO
79+
80+
from rich.console import Console
81+
82+
model_summary = RichModelSummary()
83+
model = BoringModel()
84+
summary = summarize(model)
85+
summary_data = summary._get_summary_data()
86+
87+
output = StringIO()
88+
console = Console(file=output, force_terminal=True)
89+
90+
with mock.patch("rich.get_console", return_value=console):
91+
model_summary.summarize(
92+
summary_data=summary_data,
93+
total_parameters=1,
94+
trainable_parameters=1,
95+
model_size=5500.0,
96+
total_training_modes=summary.total_training_modes,
97+
total_flops=1,
98+
)
99+
100+
result = output.getvalue()
101+
# model_size=5500.0 should display as "5,500.000" (formatted), not "5.5 K" (human readable count)
102+
assert "5,500.000" in result
103+
assert "5.5 K" not in result

tests/tests_pytorch/loops/test_training_loop.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,18 @@ def on_train_batch_start(self, batch, batch_idx):
330330
assert len(trainer.lr_scheduler_configs) == 1
331331
assert all(a == adjusted_lr[0] for a in adjusted_lr)
332332
assert init_lr * 0.1**max_epochs == adjusted_lr[0]
333+
334+
335+
@pytest.mark.parametrize("limit_val_batches", [0, 0.0])
336+
def test_val_check_interval_with_limit_val_batches_zero(tmp_path, limit_val_batches):
337+
"""Test that val_check_interval > num training batches does not raise when limit_val_batches=0."""
338+
model = BoringModel()
339+
trainer = Trainer(
340+
default_root_dir=tmp_path,
341+
max_epochs=1,
342+
limit_train_batches=5,
343+
val_check_interval=10, # greater than limit_train_batches
344+
limit_val_batches=limit_val_batches, # validation disabled
345+
)
346+
# Should not raise ValueError
347+
trainer.fit(model)

0 commit comments

Comments
 (0)