Skip to content

Commit 6632002

Browse files
Merge pull request #6 from nathan-weinberg/skeleton
Initial skeleton for Evaluator classes and exceptions
2 parents a770a86 + 20d2fc4 commit 6632002

File tree

5 files changed

+173
-0
lines changed

5 files changed

+173
-0
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
# eval
22

3+
![Lint](https://github.com/instructlab/eval/actions/workflows/lint.yml/badge.svg?branch=main)
4+
![Build](https://github.com/instructlab/eval/actions/workflows/pypi.yaml/badge.svg?branch=main)
5+
![Release](https://img.shields.io/github/v/release/instructlab/eval)
6+
![License](https://img.shields.io/github/license/instructlab/eval)
7+
38
Python library for Evaluation

src/instructlab/eval/evaluator.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
4+
class Evaluator:
5+
"""
6+
Parent class for Evaluators
7+
8+
Atttributes:
9+
model_path Path to the model to be evaluated
10+
"""
11+
12+
def __init__(self, model_path: str) -> None:
13+
self.model_path = model_path

src/instructlab/eval/exceptions.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
4+
class EvalError(Exception):
5+
"""
6+
Parent class for all of instructlab-eval exceptions
7+
"""
8+
9+
10+
class ModelNotFoundError(EvalError):
11+
"""
12+
Exception raised when model is not able to be found
13+
14+
Attributes
15+
message error message to be printed on raise
16+
model model that is being operated on
17+
path filepath of model location
18+
"""
19+
20+
def __init__(self, path) -> None:
21+
super().__init__()
22+
self.path = path
23+
self.model = path.rsplit("/")[-1]
24+
self.message = f"Model {self.model} could not be found at {self.path}"

src/instructlab/eval/mmlu.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
# Local
4+
from .evaluator import Evaluator
5+
6+
7+
class MMLU_Evaluator(Evaluator):
8+
"""
9+
Child class of an Evaluator for Massive Multitask Language Understanding (MMLU)
10+
11+
Attributes:
12+
tasks list of tasks for MMLU to test the model with
13+
few_shots number of examples
14+
batch_size number of GPUs
15+
"""
16+
17+
def __init__(
18+
self, model_path, tasks: list[str], few_shots: int = 2, batch_size: int = 5
19+
) -> None:
20+
super().__init__(model_path)
21+
self.tasks = tasks
22+
self.few_shots = few_shots
23+
self.batch_size = batch_size
24+
25+
def run(self) -> tuple:
26+
"""
27+
Runs MMLU evaluation
28+
29+
Returns:
30+
overall_score MMLU score for the overall model evaluation
31+
individual_scores Individual MMLU score for each task
32+
"""
33+
individual_scores: dict[str, float] = {}
34+
overall_score: float = 0.0
35+
return overall_score, individual_scores
36+
37+
38+
class PR_MMLU_Evaluator(Evaluator):
39+
"""
40+
Child class of an Evaluator for PR Massive Multitask Language Understanding (PR MMLU)
41+
42+
Attributes:
43+
sdg_path path where all the PR MMLU tasks are stored
44+
task group name that is shared by all the PR MMLU tasks
45+
few_shots number of examples
46+
batch_size number of GPUs
47+
"""
48+
49+
def __init__(
50+
self,
51+
model_path,
52+
sdg_path: str,
53+
task: str = "mmlu_pr",
54+
few_shots: int = 2,
55+
batch_size: int = 5,
56+
) -> None:
57+
super().__init__(model_path)
58+
self.sdg_path = sdg_path
59+
self.task = task
60+
self.few_shots = few_shots
61+
self.batch_size = batch_size
62+
63+
def run(self) -> tuple:
64+
"""
65+
Runs PR MMLU evaluation
66+
67+
Returns:
68+
overall_score PR MMLU score for the overall model evaluation
69+
individual_scores Individual PR MMLU scores for each task
70+
qa_pairs Question and answer pairs from the evaluation
71+
"""
72+
individual_scores: dict[str, float] = {}
73+
overall_score: float = 0.0
74+
qa_pairs: list[tuple] = []
75+
return overall_score, individual_scores, qa_pairs

src/instructlab/eval/mtbench.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
# Local
4+
from .evaluator import Evaluator
5+
6+
7+
class MT_Bench_Evaluator(Evaluator):
8+
"""
9+
Child class of an Evaluator for Multi-turn Benchmark (MT-Bench)
10+
11+
Attributes
12+
server_url vLLM server endpoint
13+
"""
14+
15+
def __init__(self, model_path, server_url: str) -> None:
16+
super().__init__(model_path)
17+
self.server_url = server_url
18+
19+
def run(self) -> tuple:
20+
"""
21+
Runs MT-Bench evaluation
22+
23+
Returns:
24+
overall_score MT-Bench score for the overall model evaluation
25+
qa_pairs Question and answer pairs from the evaluation
26+
"""
27+
overall_score: float = 0.0
28+
qa_pairs: list[tuple] = []
29+
return overall_score, qa_pairs
30+
31+
32+
class PR_Bench_Evaluator(Evaluator):
33+
"""
34+
Child class of an Evaluator for PR-Bench Benchmark (PR-Bench)
35+
36+
Attributes
37+
server_url vLLM server endpoint
38+
questions questions to be asked
39+
"""
40+
41+
def __init__(self, model_path, server_url: str, questions: str) -> None:
42+
super().__init__(model_path)
43+
self.server_url = server_url
44+
self.questions = questions
45+
46+
def run(self) -> tuple:
47+
"""
48+
Runs PR-Bench evaluation
49+
50+
Returns:
51+
overall_score MT-Bench score for the overall model evaluation
52+
qa_pairs Question and answer pairs from the evaluation
53+
"""
54+
overall_score = 0.0
55+
qa_pairs: list[tuple] = []
56+
return overall_score, qa_pairs

0 commit comments

Comments
 (0)