Fix BootstrapFinetune example in index doc and add basic tests for bootstrap_finetune. (#8435)

Hangzhi · okhat · web-flow · commit 44ba438432a5 · 2025-08-11T15:05:23.000-04:00
* done * done * done * format * done * Revert specific files to commit c396217 * fix set_lm * fix * fix testing * lint * link the tutorial to index sessions for clarification * resolve comments * format --------- Co-authored-by: Omar Khattab <okhat@users.noreply.github.com>
diff --git a/docs/docs/index.md b/docs/docs/index.md
@@ -403,17 +403,20 @@ Given a few tens or hundreds of representative _inputs_ of your task and a _metr
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM("openai/gpt-4o-mini-2024-07-18"))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields("label", type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
         optimized = optimizer.compile(classify, trainset=trainset)
 
         optimized(text="What does a pending cash withdrawal mean?")
+        
+        # For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
         ```
 
         **Possible Output (from the last line):**
diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md
@@ -58,17 +58,18 @@ These optimizers produce optimal instructions for the prompt and, in the case of
 
 6. [**`MIPROv2`**](../../api/optimizers/MIPROv2.md): Generates instructions *and* few-shot examples in each step. The instruction generation is data-aware and demonstration-aware. Uses Bayesian Optimization to effectively search over the space of generation instructions/demonstrations across your modules.
 
+7. [**`SIMBA`**](../../api/optimizers/SIMBA.md)
 
 ### Automatic Finetuning
 
 This optimizer is used to fine-tune the underlying LLM(s).
 
-7. [**`BootstrapFinetune`**](../../api/optimizers/BootstrapFinetune.md): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM.
+8. [**`BootstrapFinetune`**](/api/optimizers/BootstrapFinetune): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM. [See the classification fine-tuning tutorial](https://dspy.ai/tutorials/classification_finetuning/) for a complete example.
 
 
 ### Program Transformations
 
-8. [**`Ensemble`**](../../api/optimizers/Ensemble.md): Ensembles a set of DSPy programs and either uses the full set or randomly samples a subset into a single program.
+9. [**`Ensemble`**](../../api/optimizers/Ensemble.md): Ensembles a set of DSPy programs and either uses the full set or randomly samples a subset into a single program.
 
 
 ## Which optimizer should I use?
@@ -176,17 +177,20 @@ optimized_program = teleprompter.compile(YOUR_PROGRAM_HERE, trainset=YOUR_TRAINS
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM('openai/gpt-4o-mini-2024-07-18'))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields('label', type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
         optimized = optimizer.compile(classify, trainset=trainset)
 
         optimized(text="What does a pending cash withdrawal mean?")
+        
+        # For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
         ```
 
         **Possible Output (from the last line):**
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
@@ -81,7 +81,14 @@ def compile(
         key_to_data = {}
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
+            if pred.lm is None:
+                raise ValueError(
+                    f"Predictor {pred_ind} does not have an LM assigned. "
+                    f"Please ensure the module's predictors have their LM set before fine-tuning. "
+                    f"You can set it using: your_module.set_lm(your_lm)"
+                )
             training_key = (pred.lm, data_pred_ind)
+
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
                     trace_data=trace_data, lm=pred.lm, pred_ind=data_pred_ind
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
@@ -0,0 +1,81 @@
+from unittest.mock import patch
+
+import dspy
+from dspy import Example
+from dspy.predict import Predict
+from dspy.teleprompt import BootstrapFinetune
+from dspy.utils.dummies import DummyLM
+
+
+# Define a simple metric function for testing
+def simple_metric(example, prediction, trace=None):
+    return example.output == prediction.output
+
+
+examples = [
+    Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
+    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
+]
+trainset = [examples[0]]
+
+
+def test_bootstrap_finetune_initialization():
+    """Test BootstrapFinetune initialization with various parameters."""
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+    assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
+    assert bootstrap.multitask == True, "Multitask should default to True"
+
+
+class SimpleModule(dspy.Module):
+    def __init__(self, signature):
+        super().__init__()
+        self.predictor = Predict(signature)
+
+    def forward(self, **kwargs):
+        return self.predictor(**kwargs)
+
+
+def test_compile_with_predict_instances():
+    """Test BootstrapFinetune compilation with Predict instances."""
+    # Create SimpleModule instances for student and teacher
+    student = SimpleModule("input -> output")
+    teacher = SimpleModule("input -> output")
+
+    lm = DummyLM([{"output": "blue"}, {"output": "Ring-ding-ding-ding-dingeringeding!"}])
+    dspy.settings.configure(lm=lm)
+
+    # Set LM for both student and teacher
+    student.set_lm(lm)
+    teacher.set_lm(lm)
+
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+
+    # Mock the fine-tuning process since DummyLM doesn't support it
+    with patch.object(bootstrap, "finetune_lms") as mock_finetune:
+        mock_finetune.return_value = {(lm, None): lm}
+        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
+
+        assert compiled_student is not None, "Failed to compile student"
+        assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"
+
+        mock_finetune.assert_called_once()
+
+
+def test_error_handling_missing_lm():
+    """Test error handling when predictor doesn't have an LM assigned."""
+
+    lm = DummyLM([{"output": "test"}])
+    dspy.settings.configure(lm=lm)
+
+    student = SimpleModule("input -> output")
+    # Intentionally NOT setting LM for the student module
+
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+
+    # This should raise ValueError about missing LM and hint to use set_lm
+    try:
+        bootstrap.compile(student, trainset=trainset)
+        assert False, "Should have raised ValueError for missing LM"
+    except ValueError as e:
+        assert "does not have an LM assigned" in str(e)
+        assert "set_lm" in str(e)