Skip to content

Commit d8fe0d5

Browse files
committed
adds mmlu-pro
1 parent 3cd31fd commit d8fe0d5

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
name:
3+
MMLU Pro
4+
5+
dataset:
6+
TIGER-Lab/MMLU-Pro
7+
8+
abstract:
9+
10+
languages:
11+
english
12+
13+
tags:
14+
general-knowledge
15+
16+
paper:
17+
18+
"""
19+
from string import ascii_uppercase
20+
21+
from lighteval.metrics.dynamic_metrics import (
22+
LogLikelihoodAccMetric,
23+
)
24+
from lighteval.metrics.metrics import Metrics
25+
from lighteval.metrics.normalizations import LogProbCharNorm, LogProbPMINorm, LogProbTokenNorm
26+
from lighteval.tasks.lighteval_task import LightevalTaskConfig
27+
from lighteval.tasks.multilingual.utils.task_utils import get_metrics_for_formulation
28+
from lighteval.tasks.requests import Doc
29+
from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
30+
from lighteval.tasks.templates.utils.formulation import (
31+
CFFormulation,
32+
HybridFormulation,
33+
MCFFormulation,
34+
)
35+
from lighteval.utils.language import Language
36+
37+
38+
TEMPLATE = """
39+
Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
40+
41+
{question}
42+
43+
{choices}
44+
45+
Answer:""".strip()
46+
47+
48+
def mmlu_pro_prompt_function(line, task_name: str = None):
49+
choices = "\n".join([f"{letter}: {choice}" for letter, choice in zip(ascii_uppercase, line["options"])])
50+
51+
query = TEMPLATE.format(
52+
question=line["question"],
53+
choices=choices,
54+
)
55+
56+
return Doc(
57+
task_name=task_name,
58+
query=query,
59+
choices=ascii_uppercase[: len(choices)],
60+
gold_index=line["answer_index"],
61+
instruction=query,
62+
)
63+
64+
65+
mmlu_pro = LightevalTaskConfig(
66+
name="mmlu_pro",
67+
prompt_function=mmlu_pro_prompt_function,
68+
suite=("lighteval",),
69+
hf_repo="TIGER-Lab/MMLU-Pro",
70+
hf_subset="default",
71+
hf_revision="3373e0b32277875b8db2aa555a333b78a08477ea",
72+
evaluation_splits=("test",),
73+
few_shots_split="validation",
74+
metrics=[Metrics.gpqa_instruct_metric],
75+
)
76+
77+
TASKS_TABLE = [mmlu_pro]

0 commit comments

Comments
 (0)