Skip to content

Commit 5f12691

Browse files
authored
add method for creating collection for benchmark (#4115)
* add method for pushing * simplify * add comment to description * simplify imports * customize collection name
1 parent 820f11c commit 5f12691

File tree

3 files changed

+63
-9
lines changed

3 files changed

+63
-9
lines changed

mteb/benchmarks/benchmark.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
from collections.abc import Iterator, Sequence
44
from dataclasses import dataclass, field
5-
from typing import TYPE_CHECKING, Literal
5+
from typing import TYPE_CHECKING, Literal, cast
66

7+
import huggingface_hub
78
import pandas as pd
89

910
from mteb.abstasks.abstask import AbsTask
1011
from mteb.types import StrURL
1112

1213
if TYPE_CHECKING:
14+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
1315
from mteb.results import BenchmarkResults
1416

1517

@@ -112,6 +114,58 @@ def _create_per_language_table(
112114
)
113115
return no_results_frame
114116

117+
def push_collection_to_hub(
118+
self,
119+
hf_username: str,
120+
collection_name: str | None = None,
121+
) -> None:
122+
"""Push the benchmark collection to Hugging Face Hub.
123+
124+
Args:
125+
hf_username: Hugging Face username or organization name
126+
collection_name: Name for the collection on Hugging Face Hub. If not provided, the benchmark name will be used.
127+
"""
128+
collections = huggingface_hub.list_collections(owner=hf_username)
129+
collection_name = collection_name or self.name
130+
existing_collection = None
131+
for collection in collections:
132+
if collection.title == collection_name:
133+
existing_collection = collection
134+
break
135+
136+
if existing_collection is None:
137+
description = self.description
138+
if description and len(description) > 150:
139+
description = description[:147] + "..."
140+
collection = huggingface_hub.create_collection(
141+
title=collection_name,
142+
namespace=hf_username,
143+
# hf collections have a 150 character limit for description, so we truncate it if it's too long
144+
description=description if description else None,
145+
)
146+
else:
147+
# list collections would output only 4 items
148+
collection = huggingface_hub.get_collection(
149+
collection_slug=existing_collection.slug
150+
)
151+
152+
existing_items = {item.item_id for item in collection.items}
153+
154+
for task in self.tasks:
155+
tasks = (
156+
cast("AbsTaskAggregate", task).tasks if task.is_aggregate else [task]
157+
)
158+
for benchmark_task in tasks:
159+
task_path = benchmark_task.metadata.dataset["path"]
160+
if task_path in existing_items:
161+
continue
162+
huggingface_hub.add_collection_item(
163+
collection_slug=collection.slug,
164+
item_id=task_path,
165+
item_type="dataset",
166+
)
167+
existing_items.add(task_path)
168+
115169

116170
class RtebBenchmark(Benchmark):
117171
"""Wrapper for RTEB benchmark."""

mteb/benchmarks/benchmarks/benchmarks.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2915,13 +2915,13 @@
29152915
reference="https://huggingface.co/collections/openbmb/visrag",
29162916
citation=r"""
29172917
@misc{yu2025visragvisionbasedretrievalaugmentedgeneration,
2918-
title={VisRAG: Vision-based Retrieval-augmented Generation on Multi-modality Documents},
2919-
author={Shi Yu and Chaoyue Tang and Bokai Xu and Junbo Cui and Junhao Ran and Yukun Yan and Zhenghao Liu and Shuo Wang and Xu Han and Zhiyuan Liu and Maosong Sun},
2920-
year={2025},
2921-
eprint={2410.10594},
2922-
archivePrefix={arXiv},
2923-
primaryClass={cs.IR},
2924-
url={https://arxiv.org/abs/2410.10594},
2918+
archiveprefix = {arXiv},
2919+
author = {Shi Yu and Chaoyue Tang and Bokai Xu and Junbo Cui and Junhao Ran and Yukun Yan and Zhenghao Liu and Shuo Wang and Xu Han and Zhiyuan Liu and Maosong Sun},
2920+
eprint = {2410.10594},
2921+
primaryclass = {cs.IR},
2922+
title = {VisRAG: Vision-based Retrieval-augmented Generation on Multi-modality Documents},
2923+
url = {https://arxiv.org/abs/2410.10594},
2924+
year = {2025},
29252925
}
29262926
""",
29272927
)

mteb/tasks/retrieval/eng/lotte_retrieval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class LoTTERetrieval(AbsTaskRetrieval):
2525
reference="https://github.com/stanford-futuredata/ColBERT/blob/main/LoTTE.md",
2626
eval_splits=["test", "dev"],
2727
eval_langs={domain: ["eng-Latn"] for domain in HF_SUBSETS},
28-
main_score="precision_at_5",
28+
main_score="recall_at_5",
2929
date=("2021-12-02", "2022-06-10"),
3030
domains=["Academic", "Web", "Social"],
3131
task_subtypes=["Article retrieval"],

0 commit comments

Comments
 (0)