Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tools/who_what_benchmark/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ datasets>=3.6.0
auto-gptq; sys_platform == "linux"
autoawq<0.2.8; sys_platform == "linux"
sentencepiece
jinja2>=3.1.0
jinja2>=3.1.0
scipy
140 changes: 140 additions & 0 deletions tools/who_what_benchmark/tests/test_cli_reranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import subprocess # nosec B404
import pytest
import logging
from test_cli_image import run_wwb


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


@pytest.mark.parametrize(
("model_id", "model_type"),
[
("cross-encoder/ms-marco-TinyBERT-L2-v2", "text-reranking"),
],
)
def test_reranking_basic(model_id, model_type, tmp_path):
GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = tmp_path / model_id.replace("/", "--")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that WWB and GenAI tests use different replacement strategies. .replace("/", "_") in GenAI

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sbalandi let's align in a separate PR


result = subprocess.run(["optimum-cli", "export",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be synced with OV_CACHE PR: #2781
cc: @akashchi

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The repo has many versions of the original model, 3 in the root as well as different openvino and onnx:

Image

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@apaniukov please give us more details

"openvino", "-m", model_id,
MODEL_PATH, "--task",
"text-classification",
"--trust-remote-code"],
capture_output=True,
text=True,
)
assert result.returncode == 0

# Collect reference with HF model
run_wwb([
"--base-model",
model_id,
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
"--hf",
])

# test Optimum
run_wwb([
"--target-model",
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
])

# test GenAI
run_wwb([
"--target-model",
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
"--genai",
"--output",
tmp_path,
])

# test w/o models
run_wwb([
"--target-data",
tmp_path / "target.csv",
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
"--genai",
])


@pytest.mark.parametrize(
("model_id", "model_type"),
[
("Qwen/Qwen3-Reranker-0.6B", "text-reranking"),
],
)
def test_reranking_qwen(model_id, model_type, tmp_path):
GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = tmp_path / model_id.replace("/", "--")

result = subprocess.run(["optimum-cli", "export",
"openvino", "-m", model_id,
MODEL_PATH, "--task",
"text-generation",
"--trust-remote-code"],
capture_output=True,
text=True,
)
assert result.returncode == 0

# Collect reference with HF model
run_wwb([
"--base-model",
model_id,
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
"--hf",
])

# test Optimum
run_wwb([
"--target-model",
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
GT_FILE,
"--device",
"CPU",
"--model-type",
model_type,
])
2 changes: 2 additions & 0 deletions tools/who_what_benchmark/whowhatbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .im2im_evaluator import Image2ImageEvaluator
from .inpaint_evaluator import InpaintingEvaluator
from .embeddings_evaluator import EmbeddingsEvaluator
from .reranking_evaluator import RerankingEvaluator


__all__ = [
Expand All @@ -17,5 +18,6 @@
"Image2ImageEvaluator",
"InpaintingEvaluator",
"EmbeddingsEvaluator",
"RerankingEvaluator",
"EVALUATOR_REGISTRY",
]
76 changes: 73 additions & 3 deletions tools/who_what_benchmark/whowhatbench/model_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq, AutoTokenizer

from .embeddings_evaluator import DEFAULT_MAX_LENGTH
from .embeddings_evaluator import DEFAULT_MAX_LENGTH as EMBED_DEFAULT_MAX_LENGTH
from .reranking_evaluator import DEFAULT_MAX_LENGTH as RERANK_DEFAULT_MAX_LENGTH, DEFAULT_TOP_K as RERANK_DEFAULT_TOP_K, reranking_base_on_causallm_arch
from .utils import mock_torch_cuda_is_available, mock_AwqQuantizer_validate_environment


Expand All @@ -21,7 +22,7 @@ def __init__(self, model, model_dir, model_type):
self.model = model
self.model_type = model_type

if model_type in ["text", "visual-text", "text-embedding"]:
if model_type in ["text", "visual-text", "text-embedding", "text-reranking"]:
try:
self.config = AutoConfig.from_pretrained(model_dir)
except Exception:
Expand Down Expand Up @@ -444,7 +445,7 @@ def load_embedding_genai_pipeline(model_dir, device="CPU", ov_config=None, **kwa
config.pooling_type = openvino_genai.TextEmbeddingPipeline.PoolingType.LAST_TOKEN
else:
config.pooling_type = openvino_genai.TextEmbeddingPipeline.PoolingType.CLS
config.max_length = DEFAULT_MAX_LENGTH
config.max_length = EMBED_DEFAULT_MAX_LENGTH
config.normalize = kwargs.get("embeds_normalize", False)
config.pad_to_max_length = True

Expand Down Expand Up @@ -486,6 +487,73 @@ def load_embedding_model(model_id, device="CPU", ov_config=None, use_hf=False, u
return model


def load_reranking_genai_pipeline(model_dir, device="CPU", ov_config=None):
try:
import openvino_genai
except ImportError as e:
logger.error("Failed to import openvino_genai package. Please install it. Details:\n", e)
exit(-1)

logger.info("Using OpenVINO GenAI TextRerankPipeline API")

config = openvino_genai.TextRerankPipeline.Config()
config.top_n = RERANK_DEFAULT_TOP_K
config.max_length = RERANK_DEFAULT_MAX_LENGTH

pipeline = openvino_genai.TextRerankPipeline(model_dir, device.upper(), config, **ov_config)

return GenAIModelWrapper(
pipeline,
model_dir,
"text-reranking"
)


def load_reranking_model(model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False):
try:
config = AutoConfig.from_pretrained(model_id, trust_remote_code=False)
except Exception:
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)

if use_hf:
logger.info("Using HF Transformers API")
if reranking_base_on_causallm_arch(config):
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
else:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(model_id, trust_remote_code=True)
elif use_genai:
logger.info("Using OpenVINO GenAI API")
model = load_reranking_genai_pipeline(model_id, device, ov_config)
else:
logger.info("Using Optimum API")
model_cls = None
if reranking_base_on_causallm_arch(config):
from optimum.intel.openvino import OVModelForCausalLM
model_cls = OVModelForCausalLM
else:
from optimum.intel.openvino import OVModelForSequenceClassification
model_cls = OVModelForSequenceClassification

try:
model = model_cls.from_pretrained(
model_id, device=device, ov_config=ov_config, safety_checker=None,
)
except ValueError as e:
logger.error("Failed to load reranking pipeline, an attempt will be made again with updated parameters. Details:\n", e)
model = model_cls.from_pretrained(
model_id,
trust_remote_code=True,
use_cache=False,
device=device,
ov_config=ov_config,
safety_checker=None
)

return model


def load_model(
model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False, **kwargs
):
Expand All @@ -512,5 +580,7 @@ def load_model(
return load_inpainting_model(model_id, device, ov_options, use_hf, use_genai)
elif model_type == "text-embedding":
return load_embedding_model(model_id, device, ov_options, use_hf, use_genai, **kwargs)
elif model_type == "text-reranking":
return load_reranking_model(model_id, device, ov_options, use_hf, use_genai)
else:
raise ValueError(f"Unsupported model type: {model_type}")
Loading
Loading