huggingface · mfuntowicz · Jul 9, 2025 · Jul 10, 2025 · Jul 11, 2025 · Aug 22, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -59,7 +59,7 @@ dependencies = [
     "huggingface_hub[hf_xet]>=0.30.2",
     "torch>=2.0,<3.0",
     "GitPython>=3.1.41", # for logging
-    "datasets>=3.5.0",
+    "datasets>=3.5.0,<4.0.0",
     "pydantic",
     "numpy<2",  # pinned to avoid incompatibilities
     # Prettiness

diff --git a/src/lighteval/tasks/prompt_manager.py b/src/lighteval/tasks/prompt_manager.py
@@ -107,6 +107,10 @@ def _prepare_chat_template(self, doc: Doc, tokenize: bool = True) -> str:
             messages.append({"role": "user", "content": query})
             messages.append({"role": "assistant", "content": fewshot_sample.get_golds()[0]})
 
+        # If there are any additional messages to include, lets do it
+        if doc.additional_messages:
+            messages += doc.additional_messages
+
         # Add main query
         main_query = self._extract_query(doc.query, doc.instruction)
 

diff --git a/src/lighteval/tasks/requests.py b/src/lighteval/tasks/requests.py
@@ -202,11 +202,12 @@ class Doc:
     images: list["Image"] | None = None  # for multimodal benchmarks
     specific: dict | None = None  # Information which is specific to the current eval
 
-    # Uncoditioned query is used for PMI normalization, that's
+    # Unconditioned query is used for PMI normalization, that's
     # log P(choice | Query) - log P(choice | Unconditioned Query)
-    # The uncoditioned query shouldn't contain any information about the task, thus usually it's empty string or 'Answer:'.
+    # The unconditioned query shouldn't contain any information about the task, thus usually it's empty string or 'Answer:'.
     unconditioned_query: str | None = None
     original_query: str | None = None  # the query before preprocessing, if stored
+    additional_messages: list[object] | None = None
 
     id: str = ""
     task_name: str = ""