docs(user-guide): document use_optimized_save and use_cached_ckpt_structure (#72)

Leahlijuan · web-flow · commit 7c653cb7eefb · 2026-03-09T10:32:48.000-04:00
diff --git a/docs/user-guide.md b/docs/user-guide.md
@@ -92,6 +92,8 @@ auto_resume = wrap_trainer_and_auto_resume_with_mlflashpoint(
     # always_save_context=False, # Optional, defaults to False
     # write_thread_count=1, # Optional, defaults to 1
     # initial_write_buffer_size_bytes=DESIRED_NUM_BYTES, # Optional, defaults to 16 GB
+    # use_optimized_save=True, # Optional, defaults to True. Uses the optimized save method to reduce write time.
+    # use_cached_ckpt_structure=True, # Optional, defaults to False. Caches the checkpoint structure after identifying 2 consecutive save plan structures that are equal.
 )
 ```
 
@@ -148,6 +150,7 @@ memory_storage_writer = MemoryStorageWriter(...)
 # Use it to instantiate the Save Strategy
 megatron_save_strategy = MLFlashpointMegatronAsyncSaveStrategy(
     storage_writer=memory_storage_writer,
+    # use_cached_ckpt_structure=True, # Optional, defaults to False. Caches the checkpoint structure after identifying 2 consecutive save plan structures that are equal.
 )
 ```
 
@@ -167,7 +170,7 @@ async_request = save_local_aware_megatron_checkpoint(
 
 !!! note
 
-    Make sure to specify the checkpoint ID/path when saving based on the current step using: 
+    Make sure to specify the checkpoint ID/path when saving based on the current step using:
     `CheckpointContainerId.create_child(base_container, CheckpointContainerId.format_version_container(current_step))`
     where `base_container` is the base path CheckpointContainerId used for all checkpoints for the current job, e.g. `"/tmp/mlf-checkpoints/job123"`.
 
@@ -229,7 +232,7 @@ Code: See the [`ml_flashpoint.adapter.pytorch`](https://github.com/google/ml-fla
 To use directly with PyTorch DCP, use the provided `StorageWriter` and `StorageReader` implementations.
 You can use whatever `Planner` implementations work for your use case, or resort to the defaults.
 
-If your per-rank checkpoint data exceeds the default buffer size (16 GB as of this writing), you can increase it using the optional `initial_buffer_size_bytes` parameter. 
+If your per-rank checkpoint data exceeds the default buffer size (16 GB as of this writing), you can increase it using the optional `initial_buffer_size_bytes` parameter.
 
 #### Imports
 ```python
@@ -262,6 +265,7 @@ memory_storage_writer = MemoryStorageWriter(
         ckpt_obj_manager=checkpoint_object_manager,
         replication_manager=replication_manager,
         # initial_buffer_size_bytes=initial_write_buffer_size_bytes, # Optional - increase for larger checkpoint sizes per rank
+        # use_optimized_save=True, # Optional, defaults to True. Uses the optimized save method to reduce write time.
     ),
     mp_manager=torch_mp.Manager(),
 )