Skip to content

Commit 84da0d1

Browse files
authored
[wwb] Fix reranker tests (#2890)
## Description - split wwb reranker tests - skip rerankier genAI tests on MacOS, task created ## Checklist: - [ ] Tests have been updated or added to cover the new code <!--- If the change isn't maintenance related, update the tests at https://github.com/openvinotoolkit/openvino.genai/tree/master/tests or explain in the description why the tests don't need an update. --> - [ ] This patch fully addresses the ticket. <!--- If follow-up pull requests are needed, specify in description. --> - [ ] I have made corresponding changes to the documentation
1 parent 15e20af commit 84da0d1

File tree

4 files changed

+72
-87
lines changed

4 files changed

+72
-87
lines changed

tools/who_what_benchmark/tests/test_cli_embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
def test_embeddings_basic(model_id, model_type, tmp_path):
1919
GT_FILE = tmp_path / "gt.csv"
20-
MODEL_PATH = tmp_path / model_id.replace("/", "--")
20+
MODEL_PATH = tmp_path / model_id.replace("/", "_")
2121

2222
result = subprocess.run(["optimum-cli", "export",
2323
"openvino", "-m", model_id,

tools/who_what_benchmark/tests/test_cli_image.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def run_wwb(args):
3838

3939
def setup_module():
4040
for model_id in OV_IMAGE_MODELS:
41-
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
41+
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "_"))
4242
subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH], capture_output=True, text=True)
4343

4444

@@ -121,7 +121,7 @@ def test_image_model_genai(model_id, model_type, tmp_path):
121121
pytest.xfail("Ticket 173169")
122122

123123
GT_FILE = tmp_path / "gt.csv"
124-
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
124+
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "_"))
125125

126126
run_wwb([
127127
"--base-model",
Lines changed: 68 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,94 @@
11
import subprocess # nosec B404
2+
import sys
23
import pytest
4+
import shutil
35
import logging
6+
import tempfile
47
from test_cli_image import run_wwb
8+
from pathlib import Path
59

610

711
logging.basicConfig(level=logging.INFO)
812
logger = logging.getLogger(__name__)
13+
tmp_dir = tempfile.mkdtemp()
914

1015

11-
@pytest.mark.parametrize(
12-
("model_id", "model_type"),
13-
[
14-
("cross-encoder/ms-marco-TinyBERT-L2-v2", "text-reranking"),
15-
],
16-
)
17-
def test_reranking_basic(model_id, model_type, tmp_path):
18-
GT_FILE = tmp_path / "gt.csv"
19-
MODEL_PATH = tmp_path / model_id.replace("/", "--")
20-
21-
result = subprocess.run(["optimum-cli", "export",
22-
"openvino", "-m", model_id,
23-
MODEL_PATH, "--task",
24-
"text-classification",
25-
"--trust-remote-code"],
26-
capture_output=True,
27-
text=True,
28-
)
29-
assert result.returncode == 0
16+
OV_RERANK_MODELS = {
17+
("cross-encoder/ms-marco-TinyBERT-L2-v2", "text-classification"),
18+
("Qwen/Qwen3-Reranker-0.6B", "text-generation"),
19+
}
3020

31-
# Collect reference with HF model
21+
22+
def setup_module():
23+
for model_info in OV_RERANK_MODELS:
24+
model_id = model_info[0]
25+
task = model_info[1]
26+
MODEL_PATH = Path(tmp_dir, model_id.replace("/", "_"))
27+
subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH, "--task", task, "--trust-remote-code"],
28+
capture_output=True,
29+
text=True)
30+
31+
32+
def teardown_module():
33+
logger.info("Remove models")
34+
shutil.rmtree(tmp_dir)
35+
36+
37+
@pytest.mark.parametrize(("model_info"), OV_RERANK_MODELS)
38+
def test_reranking_genai(model_info, tmp_path):
39+
if sys.platform == 'darwin':
40+
pytest.xfail("Ticket 175534")
41+
42+
GT_FILE = Path(tmp_dir) / "gt.csv"
43+
model_id = model_info[0]
44+
MODEL_PATH = Path(tmp_dir) / model_id.replace("/", "_")
45+
46+
# test GenAI
3247
run_wwb([
3348
"--base-model",
34-
model_id,
49+
MODEL_PATH,
3550
"--num-samples",
3651
"1",
3752
"--gt-data",
3853
GT_FILE,
3954
"--device",
4055
"CPU",
4156
"--model-type",
42-
model_type,
43-
"--hf",
57+
"text-reranking",
58+
"--genai"
4459
])
4560

46-
# test Optimum
61+
assert Path(tmp_dir, "reference").exists()
62+
63+
64+
@pytest.mark.parametrize(
65+
("model_info"), OV_RERANK_MODELS
66+
)
67+
def test_reranking_optimum(model_info, tmp_path):
68+
GT_FILE = Path(tmp_dir) / "gt.csv"
69+
model_id = model_info[0]
70+
MODEL_PATH = Path(tmp_dir, model_id.replace("/", "_"))
71+
72+
# Collect reference with HF model
4773
run_wwb([
48-
"--target-model",
49-
MODEL_PATH,
74+
"--base-model",
75+
model_id,
5076
"--num-samples",
5177
"1",
5278
"--gt-data",
5379
GT_FILE,
5480
"--device",
5581
"CPU",
5682
"--model-type",
57-
model_type,
83+
"text-reranking",
84+
"--hf",
5885
])
5986

60-
# test GenAI
61-
run_wwb([
87+
assert GT_FILE.exists()
88+
assert Path(tmp_dir, "reference").exists()
89+
90+
# test Optimum
91+
outpus = run_wwb([
6292
"--target-model",
6393
MODEL_PATH,
6494
"--num-samples",
@@ -68,12 +98,17 @@ def test_reranking_basic(model_id, model_type, tmp_path):
6898
"--device",
6999
"CPU",
70100
"--model-type",
71-
model_type,
72-
"--genai",
101+
"text-reranking",
73102
"--output",
74103
tmp_path,
75104
])
76105

106+
assert (tmp_path / "target").exists()
107+
assert (tmp_path / "target.csv").exists()
108+
assert (tmp_path / "metrics_per_question.csv").exists()
109+
assert (tmp_path / "metrics.csv").exists()
110+
assert "Metrics for model" in outpus
111+
77112
# test w/o models
78113
run_wwb([
79114
"--target-data",
@@ -85,56 +120,6 @@ def test_reranking_basic(model_id, model_type, tmp_path):
85120
"--device",
86121
"CPU",
87122
"--model-type",
88-
model_type,
89-
"--genai",
90-
])
91-
92-
93-
@pytest.mark.parametrize(
94-
("model_id", "model_type"),
95-
[
96-
("Qwen/Qwen3-Reranker-0.6B", "text-reranking"),
97-
],
98-
)
99-
def test_reranking_qwen(model_id, model_type, tmp_path):
100-
GT_FILE = tmp_path / "gt.csv"
101-
MODEL_PATH = tmp_path / model_id.replace("/", "--")
102-
103-
result = subprocess.run(["optimum-cli", "export",
104-
"openvino", "-m", model_id,
105-
MODEL_PATH, "--task",
106-
"text-generation",
107-
"--trust-remote-code"],
108-
capture_output=True,
109-
text=True,
110-
)
111-
assert result.returncode == 0
112-
113-
# Collect reference with HF model
114-
run_wwb([
115-
"--base-model",
116-
model_id,
117-
"--num-samples",
118-
"1",
119-
"--gt-data",
120-
GT_FILE,
121-
"--device",
122-
"CPU",
123-
"--model-type",
124-
model_type,
125-
"--hf",
126-
])
127-
128-
# test Optimum
129-
run_wwb([
130-
"--target-model",
131-
MODEL_PATH,
132-
"--num-samples",
133-
"1",
134-
"--gt-data",
135-
GT_FILE,
136-
"--device",
137-
"CPU",
138-
"--model-type",
139-
model_type,
123+
"text-reranking",
124+
"--genai"
140125
])

tools/who_what_benchmark/tests/test_cli_vlm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def run_test(model_id, model_type, optimum_threshold, genai_threshold, tmp_path)
1313
if sys.platform == 'darwin':
1414
pytest.xfail("Ticket 173169")
1515
GT_FILE = tmp_path / "gt.csv"
16-
MODEL_PATH = tmp_path / model_id.replace("/", "--")
16+
MODEL_PATH = tmp_path / model_id.replace("/", "_")
1717

1818
result = subprocess.run(["optimum-cli", "export",
1919
"openvino", "-m", model_id,

0 commit comments

Comments
 (0)