Skip to content

Commit cd47eaa

Browse files
committed
push up evaluation script
Signed-off-by: Oleg Silkin <[email protected]>
1 parent ce38464 commit cd47eaa

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Example usage:
5+
python scripts/evaluate_best_checkpoint.py \
6+
/path/to/checkpoint_dir \
7+
--output-file /path/to/output_file
8+
"""
9+
10+
import json
11+
import typer
12+
from pathlib import Path
13+
from typing import Optional
14+
15+
16+
app = typer.Typer()
17+
18+
19+
@app.command()
20+
def main(
21+
input_dir: Path = typer.Argument(..., help="Input directory to process"),
22+
output_file: Optional[Path] = typer.Option(None, help="Optional output file path"),
23+
):
24+
"""
25+
Process files in the input directory and optionally save results to an output file.
26+
"""
27+
if not input_dir.exists():
28+
typer.echo(f"Error: Input directory '{input_dir}' does not exist")
29+
raise typer.Exit(1)
30+
31+
if not input_dir.is_dir():
32+
typer.echo(f"Error: '{input_dir}' is not a directory")
33+
raise typer.Exit(1)
34+
35+
checkpoint_dirs = list(input_dir.glob("hf_format/samples_*"))
36+
typer.echo(f"Found {len(checkpoint_dirs)} samples files")
37+
38+
if not checkpoint_dirs:
39+
typer.echo(
40+
f"No checkpoint directories found in the input directory: {input_dir}"
41+
)
42+
raise typer.Exit(1)
43+
44+
typer.echo("importing LeaderboardV2Evaluator, this may take a while...")
45+
from instructlab.eval.leaderboard import LeaderboardV2Evaluator
46+
47+
checkpoint_results = {}
48+
for checkpoint in checkpoint_dirs:
49+
typer.echo(f"Processing checkpoint: {checkpoint}")
50+
ckpt_output_file = checkpoint / "leaderboard_results.json"
51+
evaluator = LeaderboardV2Evaluator(
52+
model_path=str(checkpoint), output_file=ckpt_output_file
53+
)
54+
result = evaluator.run()
55+
checkpoint_results[checkpoint.name] = result
56+
typer.echo(f"Checkpoint {checkpoint.name} results: {result['score']}")
57+
58+
# Sort checkpoints by score
59+
sorted_checkpoints = sorted(
60+
checkpoint_results.items(), key=lambda x: x[1]["score"], reverse=True
61+
)
62+
typer.echo("Sorted checkpoints by score:")
63+
for checkpoint_name, result in sorted_checkpoints:
64+
typer.echo(f"{'=' * 100}")
65+
typer.echo(json.dumps(result, indent=2))
66+
67+
typer.echo(f"{'=' * 100}")
68+
typer.echo(f"Best checkpoint: {sorted_checkpoints[0][0]}")
69+
70+
if output_file:
71+
typer.echo(f"Output will be saved to: {output_file}")
72+
with open(output_file, "w") as f:
73+
json.dump(checkpoint_results, f, indent=2)
74+
75+
# Add your processing logic here
76+
77+
typer.echo("Processing complete!")
78+
79+
80+
if __name__ == "__main__":
81+
app()

0 commit comments

Comments
 (0)