33
33
)
34
34
from lighteval .tasks .default_prompts import LETTER_INDICES
35
35
from lighteval .tasks .lighteval_task import LightevalTaskConfig
36
+ from lighteval .tasks .multilingual .tasks import LangCodeLanguage , iso_639_3_ind_to_iso_639_3_macro
36
37
from lighteval .tasks .requests import Doc
37
38
from lighteval .utils .language import Language
38
39
48
49
}
49
50
50
51
52
+ def belebele_prompt_en_instruct (line , task_name : str = None ):
53
+ line ["dialect" ] == "eng_Latn"
54
+ return belebele_prompt (line , task_name )
55
+
56
+
51
57
def belebele_prompt (line , task_name : str = None ):
52
58
lang_to_template = {
53
59
"eng_Latn" : "Given the following passage, query, and answer choices, output the letter corresponding to the correct answer. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of A, B, C, or D. Think step by step before answering.\n \n ###\n Passage:\n {Passage}\n ###\n Query:\n {Question}\n ###\n Choices:\n A) {A}\n B) {B}\n C) {C}\n D) {D}" ,
@@ -60,7 +66,7 @@ def belebele_prompt(line, task_name: str = None):
60
66
61
67
gold_index = int (line ["correct_answer_num" ]) - 1
62
68
choices = [line ["mc_answer1" ], line ["mc_answer2" ], line ["mc_answer3" ], line ["mc_answer4" ]]
63
- query_template = lang_to_template . get ( line ["dialect" ], "eng_Latn" )
69
+ query_template = lang_to_template [ line ["dialect" ]]
64
70
query = query_template .format (
65
71
A = choices [0 ],
66
72
B = choices [1 ],
@@ -80,9 +86,9 @@ def belebele_prompt(line, task_name: str = None):
80
86
)
81
87
82
88
83
- BELEBELE_TASKS = [
89
+ BELEBELE_TASKS_NATIVE_INSTRUCT = [
84
90
LightevalTaskConfig (
85
- name = f"belebele_instruct_ { lang } _Latn" ,
91
+ name = f"belebele_native_instruct_ { lang } _Latn" ,
86
92
prompt_function = belebele_prompt ,
87
93
suite = ["extended" ],
88
94
hf_repo = "facebook/belebele" ,
@@ -123,7 +129,168 @@ def belebele_prompt(line, task_name: str = None):
123
129
"spa" ,
124
130
]
125
131
]
126
- TASKS_TABLE .extend (BELEBELE_TASKS )
132
+
133
+ BELEBELE_TASKS_EN_INSTRUCT = [
134
+ LightevalTaskConfig (
135
+ name = f"belebele_en_instruct_{ lang } " ,
136
+ prompt_function = belebele_prompt_en_instruct ,
137
+ suite = ["extended" ],
138
+ hf_repo = "facebook/belebele" ,
139
+ hf_subset = f"{ lang } _Latn" ,
140
+ evaluation_splits = ["test" ],
141
+ hf_avail_splits = ["test" ],
142
+ few_shots_split = None ,
143
+ few_shots_select = None ,
144
+ generation_size = 32768 , # needed for reasoning models like R1
145
+ metric = [
146
+ SampleLevelMetric (
147
+ metric_name = "pass@1:1_samples" ,
148
+ sample_level_fn = PassAtK (
149
+ k = 1 ,
150
+ n = 1 ,
151
+ sample_scoring_function = lambda pred , ref , doc : multilingual_extractive_match_metric (
152
+ language = iso_639_3_ind_to_iso_639_3_macro [LangCodeLanguage .get (lang ).to_alpha3 ()],
153
+ gold_extraction_target = [IndicesExtractionConfig (prefix_for_extraction = "NativeLetters" )],
154
+ pred_extraction_target = [IndicesExtractionConfig (prefix_for_extraction = "NativeLetters" )],
155
+ precision = 6 ,
156
+ ).sample_level_fn ([ref ], [pred ], doc ),
157
+ ).compute ,
158
+ category = MetricCategory .GENERATIVE_SAMPLING ,
159
+ use_case = MetricUseCase .REASONING ,
160
+ corpus_level_fn = np .mean ,
161
+ higher_is_better = True ,
162
+ )
163
+ ],
164
+ stop_sequence = [], # no stop sequence, will use eos token
165
+ trust_dataset = True ,
166
+ version = 1 ,
167
+ )
168
+ for lang in [
169
+ "acm_Arab" ,
170
+ "arz_Arab" ,
171
+ "ceb_Latn" ,
172
+ "fin_Latn" ,
173
+ "hin_Deva" ,
174
+ "ita_Latn" ,
175
+ "khm_Khmr" ,
176
+ "lvs_Latn" ,
177
+ "npi_Deva" ,
178
+ "pol_Latn" ,
179
+ "slv_Latn" ,
180
+ "swe_Latn" ,
181
+ # "tso_Latn",
182
+ # "xho_Latn",
183
+ "afr_Latn" ,
184
+ "asm_Beng" ,
185
+ "ces_Latn" ,
186
+ "fra_Latn" ,
187
+ "hin_Latn" ,
188
+ "jav_Latn" ,
189
+ # "kin_Latn",
190
+ "mal_Mlym" ,
191
+ "npi_Latn" ,
192
+ "por_Latn" ,
193
+ # "sna_Latn",
194
+ "swh_Latn" ,
195
+ "tur_Latn" ,
196
+ "yor_Latn" ,
197
+ "als_Latn" ,
198
+ "azj_Latn" ,
199
+ "ckb_Arab" ,
200
+ # "fuv_Latn",
201
+ "hrv_Latn" ,
202
+ "jpn_Jpan" ,
203
+ "kir_Cyrl" ,
204
+ "mar_Deva" ,
205
+ # "nso_Latn",
206
+ "snd_Arab" ,
207
+ "tam_Taml" ,
208
+ "ukr_Cyrl" ,
209
+ "zho_Hans" ,
210
+ "amh_Ethi" ,
211
+ # "bam_Latn",
212
+ "dan_Latn" ,
213
+ # "gaz_Latn",
214
+ "hun_Latn" ,
215
+ # "kac_Latn",
216
+ "kor_Hang" ,
217
+ "mkd_Cyrl" ,
218
+ # "nya_Latn",
219
+ "ron_Latn" ,
220
+ "som_Latn" ,
221
+ "tel_Telu" ,
222
+ "urd_Arab" ,
223
+ "zho_Hant" ,
224
+ "apc_Arab" ,
225
+ "ben_Beng" ,
226
+ "deu_Latn" ,
227
+ # "grn_Latn",
228
+ "hye_Armn" ,
229
+ "kan_Knda" ,
230
+ "lao_Laoo" ,
231
+ "mlt_Latn" ,
232
+ "ory_Orya" ,
233
+ "rus_Cyrl" ,
234
+ # "sot_Latn",
235
+ "tgk_Cyrl" ,
236
+ "urd_Latn" ,
237
+ "zsm_Latn" ,
238
+ "arb_Arab" ,
239
+ "ben_Latn" ,
240
+ "ell_Grek" ,
241
+ "guj_Gujr" ,
242
+ # "ibo_Latn",
243
+ "kat_Geor" ,
244
+ # "lin_Latn",
245
+ # "mri_Latn",
246
+ "pan_Guru" ,
247
+ # "shn_Mymr",
248
+ "spa_Latn" ,
249
+ "tgl_Latn" ,
250
+ "uzn_Latn" ,
251
+ # "zul_Latn",
252
+ "arb_Latn" ,
253
+ # "bod_Tibt",
254
+ "eng_Latn" ,
255
+ # "hat_Latn",
256
+ # "ilo_Latn",
257
+ "kaz_Cyrl" ,
258
+ "lit_Latn" ,
259
+ "mya_Mymr" ,
260
+ "pbt_Arab" ,
261
+ "sin_Latn" ,
262
+ "srp_Cyrl" ,
263
+ "tha_Thai" ,
264
+ "vie_Latn" ,
265
+ "ars_Arab" ,
266
+ "bul_Cyrl" ,
267
+ "est_Latn" ,
268
+ # "hau_Latn",
269
+ "ind_Latn" ,
270
+ # "kea_Latn",
271
+ # "lug_Latn",
272
+ "nld_Latn" ,
273
+ "pes_Arab" ,
274
+ "sin_Sinh" ,
275
+ # "ssw_Latn",
276
+ # "tir_Ethi",
277
+ "war_Latn" ,
278
+ "ary_Arab" ,
279
+ "cat_Latn" ,
280
+ "eus_Latn" ,
281
+ "heb_Hebr" ,
282
+ "isl_Latn" ,
283
+ # "khk_Cyrl",
284
+ # "luo_Latn",
285
+ "nob_Latn" ,
286
+ "plt_Latn" ,
287
+ "slk_Latn" ,
288
+ # "sun_Latn",
289
+ # "tsn_Latn",
290
+ # "wol_Latn",
291
+ ]
292
+ ]
293
+ TASKS_TABLE .extend (BELEBELE_TASKS_NATIVE_INSTRUCT + BELEBELE_TASKS_EN_INSTRUCT )
127
294
128
295
129
296
class GlobalMMLUPrompt :
0 commit comments