|
2 | 2 |
|
3 | 3 | from collections.abc import Iterator, Sequence |
4 | 4 | from dataclasses import dataclass, field |
5 | | -from typing import TYPE_CHECKING, Literal |
| 5 | +from typing import TYPE_CHECKING, Literal, cast |
6 | 6 |
|
| 7 | +import huggingface_hub |
7 | 8 | import pandas as pd |
8 | 9 |
|
9 | 10 | from mteb.abstasks.abstask import AbsTask |
10 | 11 | from mteb.types import StrURL |
11 | 12 |
|
12 | 13 | if TYPE_CHECKING: |
| 14 | + from mteb.abstasks.aggregated_task import AbsTaskAggregate |
13 | 15 | from mteb.results import BenchmarkResults |
14 | 16 |
|
15 | 17 |
|
@@ -112,6 +114,58 @@ def _create_per_language_table( |
112 | 114 | ) |
113 | 115 | return no_results_frame |
114 | 116 |
|
| 117 | + def push_collection_to_hub( |
| 118 | + self, |
| 119 | + hf_username: str, |
| 120 | + collection_name: str | None = None, |
| 121 | + ) -> None: |
| 122 | + """Push the benchmark collection to Hugging Face Hub. |
| 123 | +
|
| 124 | + Args: |
| 125 | + hf_username: Hugging Face username or organization name |
| 126 | + collection_name: Name for the collection on Hugging Face Hub. If not provided, the benchmark name will be used. |
| 127 | + """ |
| 128 | + collections = huggingface_hub.list_collections(owner=hf_username) |
| 129 | + collection_name = collection_name or self.name |
| 130 | + existing_collection = None |
| 131 | + for collection in collections: |
| 132 | + if collection.title == collection_name: |
| 133 | + existing_collection = collection |
| 134 | + break |
| 135 | + |
| 136 | + if existing_collection is None: |
| 137 | + description = self.description |
| 138 | + if description and len(description) > 150: |
| 139 | + description = description[:147] + "..." |
| 140 | + collection = huggingface_hub.create_collection( |
| 141 | + title=collection_name, |
| 142 | + namespace=hf_username, |
| 143 | + # hf collections have a 150 character limit for description, so we truncate it if it's too long |
| 144 | + description=description if description else None, |
| 145 | + ) |
| 146 | + else: |
| 147 | + # list collections would output only 4 items |
| 148 | + collection = huggingface_hub.get_collection( |
| 149 | + collection_slug=existing_collection.slug |
| 150 | + ) |
| 151 | + |
| 152 | + existing_items = {item.item_id for item in collection.items} |
| 153 | + |
| 154 | + for task in self.tasks: |
| 155 | + tasks = ( |
| 156 | + cast("AbsTaskAggregate", task).tasks if task.is_aggregate else [task] |
| 157 | + ) |
| 158 | + for benchmark_task in tasks: |
| 159 | + task_path = benchmark_task.metadata.dataset["path"] |
| 160 | + if task_path in existing_items: |
| 161 | + continue |
| 162 | + huggingface_hub.add_collection_item( |
| 163 | + collection_slug=collection.slug, |
| 164 | + item_id=task_path, |
| 165 | + item_type="dataset", |
| 166 | + ) |
| 167 | + existing_items.add(task_path) |
| 168 | + |
115 | 169 |
|
116 | 170 | class RtebBenchmark(Benchmark): |
117 | 171 | """Wrapper for RTEB benchmark.""" |
|
0 commit comments