diff --git a/pyproject.toml b/pyproject.toml index abd1897f8..4558c950f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ dependencies = [ "huggingface_hub[hf_xet]>=0.30.2", "torch>=2.0,<3.0", "GitPython>=3.1.41", # for logging - "datasets>=3.5.0", + "datasets>=3.5.0,<4.0.0", "pydantic", "numpy<2", # pinned to avoid incompatibilities # Prettiness diff --git a/src/lighteval/tasks/prompt_manager.py b/src/lighteval/tasks/prompt_manager.py index 6b7068bd8..602260fec 100644 --- a/src/lighteval/tasks/prompt_manager.py +++ b/src/lighteval/tasks/prompt_manager.py @@ -107,6 +107,10 @@ def _prepare_chat_template(self, doc: Doc, tokenize: bool = True) -> str: messages.append({"role": "user", "content": query}) messages.append({"role": "assistant", "content": fewshot_sample.get_golds()[0]}) + # If there are any additional messages to include, lets do it + if doc.additional_messages: + messages += doc.additional_messages + # Add main query main_query = self._extract_query(doc.query, doc.instruction) diff --git a/src/lighteval/tasks/requests.py b/src/lighteval/tasks/requests.py index 8829510b2..cd7c53d8b 100644 --- a/src/lighteval/tasks/requests.py +++ b/src/lighteval/tasks/requests.py @@ -202,11 +202,12 @@ class Doc: images: list["Image"] | None = None # for multimodal benchmarks specific: dict | None = None # Information which is specific to the current eval - # Uncoditioned query is used for PMI normalization, that's + # Unconditioned query is used for PMI normalization, that's # log P(choice | Query) - log P(choice | Unconditioned Query) - # The uncoditioned query shouldn't contain any information about the task, thus usually it's empty string or 'Answer:'. + # The unconditioned query shouldn't contain any information about the task, thus usually it's empty string or 'Answer:'. unconditioned_query: str | None = None original_query: str | None = None # the query before preprocessing, if stored + additional_messages: list[object] | None = None id: str = "" task_name: str = ""