add method for creating collection for benchmark (#4115)

Samoed · web-flow · commit 5f126916ba31 · 2026-02-20T19:02:27.000+03:00
* add method for pushing

* simplify

* add comment to description

* simplify imports

* customize collection name
diff --git a/mteb/benchmarks/benchmark.py b/mteb/benchmarks/benchmark.py
@@ -2,14 +2,16 @@
 
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Literal, cast
 
+import huggingface_hub
 import pandas as pd
 
 from mteb.abstasks.abstask import AbsTask
 from mteb.types import StrURL
 
 if TYPE_CHECKING:
+    from mteb.abstasks.aggregated_task import AbsTaskAggregate
     from mteb.results import BenchmarkResults
 
 
@@ -112,6 +114,58 @@ def _create_per_language_table(
             )
             return no_results_frame
 
+    def push_collection_to_hub(
+        self,
+        hf_username: str,
+        collection_name: str | None = None,
+    ) -> None:
+        """Push the benchmark collection to Hugging Face Hub.
+
+        Args:
+            hf_username: Hugging Face username or organization name
+            collection_name: Name for the collection on Hugging Face Hub. If not provided, the benchmark name will be used.
+        """
+        collections = huggingface_hub.list_collections(owner=hf_username)
+        collection_name = collection_name or self.name
+        existing_collection = None
+        for collection in collections:
+            if collection.title == collection_name:
+                existing_collection = collection
+                break
+
+        if existing_collection is None:
+            description = self.description
+            if description and len(description) > 150:
+                description = description[:147] + "..."
+            collection = huggingface_hub.create_collection(
+                title=collection_name,
+                namespace=hf_username,
+                # hf collections have a 150 character limit for description, so we truncate it if it's too long
+                description=description if description else None,
+            )
+        else:
+            # list collections would output only 4 items
+            collection = huggingface_hub.get_collection(
+                collection_slug=existing_collection.slug
+            )
+
+        existing_items = {item.item_id for item in collection.items}
+
+        for task in self.tasks:
+            tasks = (
+                cast("AbsTaskAggregate", task).tasks if task.is_aggregate else [task]
+            )
+            for benchmark_task in tasks:
+                task_path = benchmark_task.metadata.dataset["path"]
+                if task_path in existing_items:
+                    continue
+                huggingface_hub.add_collection_item(
+                    collection_slug=collection.slug,
+                    item_id=task_path,
+                    item_type="dataset",
+                )
+                existing_items.add(task_path)
+
 
 class RtebBenchmark(Benchmark):
     """Wrapper for RTEB benchmark."""
diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py
@@ -2915,13 +2915,13 @@
     reference="https://huggingface.co/collections/openbmb/visrag",
     citation=r"""
 @misc{yu2025visragvisionbasedretrievalaugmentedgeneration,
-      title={VisRAG: Vision-based Retrieval-augmented Generation on Multi-modality Documents}, 
-      author={Shi Yu and Chaoyue Tang and Bokai Xu and Junbo Cui and Junhao Ran and Yukun Yan and Zhenghao Liu and Shuo Wang and Xu Han and Zhiyuan Liu and Maosong Sun},
-      year={2025},
-      eprint={2410.10594},
-      archivePrefix={arXiv},
-      primaryClass={cs.IR},
-      url={https://arxiv.org/abs/2410.10594}, 
+  archiveprefix = {arXiv},
+  author = {Shi Yu and Chaoyue Tang and Bokai Xu and Junbo Cui and Junhao Ran and Yukun Yan and Zhenghao Liu and Shuo Wang and Xu Han and Zhiyuan Liu and Maosong Sun},
+  eprint = {2410.10594},
+  primaryclass = {cs.IR},
+  title = {VisRAG: Vision-based Retrieval-augmented Generation on Multi-modality Documents},
+  url = {https://arxiv.org/abs/2410.10594},
+  year = {2025},
 }
 """,
 )
diff --git a/mteb/tasks/retrieval/eng/lotte_retrieval.py b/mteb/tasks/retrieval/eng/lotte_retrieval.py
@@ -25,7 +25,7 @@ class LoTTERetrieval(AbsTaskRetrieval):
         reference="https://github.com/stanford-futuredata/ColBERT/blob/main/LoTTE.md",
         eval_splits=["test", "dev"],
         eval_langs={domain: ["eng-Latn"] for domain in HF_SUBSETS},
-        main_score="precision_at_5",
+        main_score="recall_at_5",
         date=("2021-12-02", "2022-06-10"),
         domains=["Academic", "Web", "Social"],
         task_subtypes=["Article retrieval"],