From 6495cfd12ec8e5b99bc57a79839059b3afe7f97e Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Wed, 1 Oct 2025 13:04:29 +0200 Subject: [PATCH 01/19] add WWB cache dir --- tools/who_what_benchmark/tests/constants.py | 7 ++++ .../tests/test_cli_image.py | 8 +++-- .../who_what_benchmark/tests/test_cli_text.py | 33 +++++++++++-------- 3 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 tools/who_what_benchmark/tests/constants.py diff --git a/tools/who_what_benchmark/tests/constants.py b/tools/who_what_benchmark/tests/constants.py new file mode 100644 index 0000000000..a799009e41 --- /dev/null +++ b/tools/who_what_benchmark/tests/constants.py @@ -0,0 +1,7 @@ +from pathlib import Path +import os +import tempfile + + +WWB_CACHE_PATH = Path(os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory()), 'wwb_cache')) +SHOULD_CLEANUP = bool(os.environ.get('CLEANUP_CACHE', None)) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index b354056daa..a124278989 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -6,12 +6,13 @@ import logging import tempfile import re +from constants import WWB_CACHE_PATH, SHOULD_CLEANUP logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -MODEL_CACHE = tempfile.mkdtemp() +MODEL_CACHE = WWB_CACHE_PATH OV_IMAGE_MODELS = ["echarlaix/tiny-random-stable-diffusion-xl", "yujiepan/stable-diffusion-3-tiny-random", "katuni4ka/tiny-random-flux", @@ -42,8 +43,9 @@ def setup_module(): def teardown_module(): - logger.info("Remove models") - shutil.rmtree(MODEL_CACHE) + if SHOULD_CLEANUP: + logger.info("Removing models") + shutil.rmtree(MODEL_CACHE) def get_similarity(output: str) -> float: diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index f564d53c09..b79ac4b706 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -7,6 +7,8 @@ import json import sys +from constants import WWB_CACHE_PATH, SHOULD_CLEANUP + from transformers import AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig @@ -18,9 +20,9 @@ model_id = "facebook/opt-125m" -tmp_dir = tempfile.mkdtemp() -base_model_path = os.path.join(tmp_dir, "opt125m") -target_model_path = os.path.join(tmp_dir, "opt125m_int8") +cache_dir = WWB_CACHE_PATH +base_model_path = os.path.join(cache_dir, "opt125m") +target_model_path = os.path.join(cache_dir, "opt125m_int8") gptq_model_id = "ybelkada/opt-125m-gptq-4bit" awq_model_id = "TitanML/tiny-mixtral-AWQ-4bit" @@ -30,23 +32,26 @@ def setup_module(): from optimum.exporters.openvino.convert import export_tokenizer logger.info("Create models") - tokenizer = AutoTokenizer.from_pretrained(model_id) - base_model = OVModelForCausalLM.from_pretrained(model_id) - base_model.save_pretrained(base_model_path) - tokenizer.save_pretrained(base_model_path) - export_tokenizer(tokenizer, base_model_path) + tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) + base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) + if not os.path.exists(base_model_path): + base_model.save_pretrained(base_model_path) + tokenizer.save_pretrained(base_model_path) + export_tokenizer(tokenizer, base_model_path) target_model = OVModelForCausalLM.from_pretrained( - model_id, quantization_config=OVWeightQuantizationConfig(bits=8) + model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH ) - target_model.save_pretrained(target_model_path) - tokenizer.save_pretrained(target_model_path) - export_tokenizer(tokenizer, target_model_path) + if not os.path.exists(target_model_path): + target_model.save_pretrained(target_model_path) + tokenizer.save_pretrained(target_model_path) + export_tokenizer(tokenizer, target_model_path) def teardown_module(): - logger.info("Remove models") - shutil.rmtree(tmp_dir) + if SHOULD_CLEANUP: + logger.info("Removing models") + shutil.rmtree(cache_dir) def test_text_target_model(): From 29a668c9c0f7c208e45bd848dee562c10aed0ccc Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Wed, 1 Oct 2025 14:21:13 +0200 Subject: [PATCH 02/19] fix lint, use cache for vlm test --- tools/who_what_benchmark/tests/test_cli_text.py | 1 - tools/who_what_benchmark/tests/test_cli_vlm.py | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index b79ac4b706..4c6aa610ff 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -1,6 +1,5 @@ import os import shutil -import tempfile import pandas as pd import pytest import logging diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index c85bc11667..b548078ed3 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -2,6 +2,7 @@ import pytest import logging from test_cli_image import run_wwb +from constants import WWB_CACHE_PATH logging.basicConfig(level=logging.INFO) @@ -14,9 +15,9 @@ ("katuni4ka/tiny-random-llava", "visual-text"), ], ) -def test_vlm_basic(model_id, model_type, tmp_path): - GT_FILE = tmp_path / "gt.csv" - MODEL_PATH = tmp_path / model_id.replace("/", "--") +def test_vlm_basic(model_id, model_type): + GT_FILE = WWB_CACHE_PATH / "gt.csv" + MODEL_PATH = WWB_CACHE_PATH / model_id.replace("/", "--") result = subprocess.run(["optimum-cli", "export", "openvino", "-m", model_id, @@ -71,13 +72,13 @@ def test_vlm_basic(model_id, model_type, tmp_path): model_type, "--genai", "--output", - tmp_path, + WWB_CACHE_PATH, ]) # test w/o models run_wwb([ "--target-data", - tmp_path / "target.csv", + WWB_CACHE_PATH / "target.csv", "--num-samples", "1", "--gt-data", From 229ef95b5d42e8f17d03168b4983d5f883297dc3 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Wed, 1 Oct 2025 15:25:08 +0200 Subject: [PATCH 03/19] use caches in other tests --- tools/who_what_benchmark/tests/test_cli_image.py | 8 +++++--- tools/who_what_benchmark/tests/test_cli_text.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index a124278989..71ee9cff4d 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -69,11 +69,12 @@ def get_similarity(output: str) -> float: ], ) def test_image_model_types(model_id, model_type, backend, tmp_path): + MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) wwb_args = [ "--base-model", - model_id, + MODEL_PATH, "--target-model", - model_id, + MODEL_PATH, "--num-samples", "1", "--gt-data", @@ -197,9 +198,10 @@ def test_image_model_genai(model_id, model_type, tmp_path): ) def test_image_custom_dataset(model_id, model_type, backend, tmp_path): GT_FILE = tmp_path / "test_sd.csv" + MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) wwb_args = [ "--base-model", - model_id, + MODEL_PATH, "--num-samples", "1", "--gt-data", diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 4c6aa610ff..071301a66c 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -142,9 +142,10 @@ def test_text_verbose(): def test_text_language(tmp_path): temp_file_name = tmp_path / "gt.csv" + MODEL_PATH = os.path.join(WWB_CACHE_PATH, 'Qwen/Qwen2-0.5B') run_wwb([ "--base-model", - "Qwen/Qwen2-0.5B", + MODEL_PATH, "--gt-data", temp_file_name, "--num-samples", @@ -174,9 +175,10 @@ def test_text_language(tmp_path): ) def test_text_hf_model(model_id, tmp_path): temp_file_name = tmp_path / "gt.csv" + MODEL_PATH = os.path.join(WWB_CACHE_PATH, model_id.replace("/", "--")) run_wwb([ "--base-model", - model_id, + MODEL_PATH, "--gt-data", temp_file_name, "--num-samples", From dd2140fca29037d00f9d42c9a06ded1efa53d382 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Wed, 1 Oct 2025 17:23:04 +0200 Subject: [PATCH 04/19] create if not existing --- .../tests/test_cli_image.py | 15 ++++++++------ .../who_what_benchmark/tests/test_cli_text.py | 20 ++++++++++--------- .../who_what_benchmark/tests/test_cli_vlm.py | 5 +++-- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 71ee9cff4d..19660b6cdc 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -2,9 +2,9 @@ import subprocess # nosec B404 import os import shutil +from pathlib import Path import pytest import logging -import tempfile import re from constants import WWB_CACHE_PATH, SHOULD_CLEANUP @@ -38,7 +38,7 @@ def run_wwb(args): def setup_module(): for model_id in OV_IMAGE_MODELS: - MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH], capture_output=True, text=True) @@ -69,7 +69,8 @@ def get_similarity(output: str) -> float: ], ) def test_image_model_types(model_id, model_type, backend, tmp_path): - MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id wwb_args = [ "--base-model", MODEL_PATH, @@ -113,11 +114,12 @@ def test_image_model_genai(model_id, model_type, tmp_path): pytest.xfail("Segfault. Ticket 170877") GT_FILE = tmp_path / "gt.csv" - MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id run_wwb([ "--base-model", - model_id, + MODEL_PATH, "--num-samples", "1", "--gt-data", @@ -198,7 +200,8 @@ def test_image_model_genai(model_id, model_type, tmp_path): ) def test_image_custom_dataset(model_id, model_type, backend, tmp_path): GT_FILE = tmp_path / "test_sd.csv" - MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id wwb_args = [ "--base-model", MODEL_PATH, diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 071301a66c..7cdbd50b4a 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -29,19 +29,19 @@ def setup_module(): from optimum.exporters.openvino.convert import export_tokenizer - - logger.info("Create models") - tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) - base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) + if not os.path.exists(base_model_path): + logger.info("Create models") + tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) + base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) base_model.save_pretrained(base_model_path) tokenizer.save_pretrained(base_model_path) export_tokenizer(tokenizer, base_model_path) - target_model = OVModelForCausalLM.from_pretrained( - model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH - ) if not os.path.exists(target_model_path): + target_model = OVModelForCausalLM.from_pretrained( + model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH + ) target_model.save_pretrained(target_model_path) tokenizer.save_pretrained(target_model_path) export_tokenizer(tokenizer, target_model_path) @@ -142,7 +142,8 @@ def test_text_verbose(): def test_text_language(tmp_path): temp_file_name = tmp_path / "gt.csv" - MODEL_PATH = os.path.join(WWB_CACHE_PATH, 'Qwen/Qwen2-0.5B') + MODEL_PATH = WWB_CACHE_PATH.joinpath('Qwen/Qwen2-0.5B'.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else 'Qwen/Qwen2-0.5B' run_wwb([ "--base-model", MODEL_PATH, @@ -175,7 +176,8 @@ def test_text_language(tmp_path): ) def test_text_hf_model(model_id, tmp_path): temp_file_name = tmp_path / "gt.csv" - MODEL_PATH = os.path.join(WWB_CACHE_PATH, model_id.replace("/", "--")) + MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id run_wwb([ "--base-model", MODEL_PATH, diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index b548078ed3..db21c0cce2 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -17,7 +17,8 @@ ) def test_vlm_basic(model_id, model_type): GT_FILE = WWB_CACHE_PATH / "gt.csv" - MODEL_PATH = WWB_CACHE_PATH / model_id.replace("/", "--") + MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id result = subprocess.run(["optimum-cli", "export", "openvino", "-m", model_id, @@ -32,7 +33,7 @@ def test_vlm_basic(model_id, model_type): # Collect reference with HF model run_wwb([ "--base-model", - model_id, + MODEL_PATH, "--num-samples", "1", "--gt-data", From 2bc0ada59148e29bbeeeae693512443eb80344f4 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 10:39:56 +0200 Subject: [PATCH 05/19] lint --- tools/who_what_benchmark/tests/constants.py | 4 ++-- tools/who_what_benchmark/tests/test_cli_image.py | 1 - tools/who_what_benchmark/tests/test_cli_text.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/who_what_benchmark/tests/constants.py b/tools/who_what_benchmark/tests/constants.py index a799009e41..4a085ddcc0 100644 --- a/tools/who_what_benchmark/tests/constants.py +++ b/tools/who_what_benchmark/tests/constants.py @@ -3,5 +3,5 @@ import tempfile -WWB_CACHE_PATH = Path(os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory()), 'wwb_cache')) -SHOULD_CLEANUP = bool(os.environ.get('CLEANUP_CACHE', None)) +WWB_CACHE_PATH = Path(os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'wwb_cache')) +SHOULD_CLEANUP = os.environ.get('CLEANUP_CACHE', '').lower() in ('1', 'true', 'yes') diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 19660b6cdc..ea5a2ef7c3 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -2,7 +2,6 @@ import subprocess # nosec B404 import os import shutil -from pathlib import Path import pytest import logging import re diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 7cdbd50b4a..0f90f34d71 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -29,7 +29,7 @@ def setup_module(): from optimum.exporters.openvino.convert import export_tokenizer - + if not os.path.exists(base_model_path): logger.info("Create models") tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) From 72ab81a381376ec89adb5e1305f91615d68c44ac Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 11:55:59 +0200 Subject: [PATCH 06/19] use cache for datasets --- tests/python_tests/test_kv_cache_eviction.py | 6 ++++-- tests/python_tests/test_whisper_pipeline.py | 7 ++++++- tools/who_what_benchmark/examples/openvino_batched_eval.py | 2 +- tools/who_what_benchmark/whowhatbench/im2im_evaluator.py | 3 ++- tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py | 3 ++- tools/who_what_benchmark/whowhatbench/wwb.py | 3 ++- 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py index 260adae397..c26255eb09 100644 --- a/tests/python_tests/test_kv_cache_eviction.py +++ b/tests/python_tests/test_kv_cache_eviction.py @@ -1,7 +1,9 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os import sys +import tempfile import datasets import pytest from dataclasses import dataclass @@ -236,7 +238,7 @@ def test_optimized_generation_longbench(test_struct): generation_config.num_return_sequences = 1 generation_config.max_new_tokens = max_new_tokens - data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', trust_remote_code=True) + data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', trust_remote_code=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) with tqdm(total=len(data)) as progress_bar: batch = [] answers = [] @@ -324,7 +326,7 @@ def test_kvcrush_vs_snapkv_baseline(subset): generation_config.max_new_tokens = max_new_tokens generation_config.apply_chat_template = False - data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]') + data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) with tqdm(total=len(data)) as progress_bar: batch = [] baseline_answers = [] diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index 1e5302814d..b62b825849 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -1,6 +1,8 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os +import tempfile import openvino_genai as ov_genai import functools import pytest @@ -189,6 +191,7 @@ def get_whisper_dataset(language: str, long_form: bool) -> list: split="test", streaming=True, trust_remote_code=True, + cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ) else: ds = datasets.load_dataset( @@ -196,6 +199,7 @@ def get_whisper_dataset(language: str, long_form: bool) -> list: split="test", streaming=True, trust_remote_code=True, + cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ) ds = typing.cast(datasets.IterableDataset, ds) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16000)) @@ -537,7 +541,8 @@ def test_longform_audio_with_past(model_descr, sample_from_dataset): def test_shortform(model_descr): samples = [] ds = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation" + "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", + cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ) for ds_row in ds: diff --git a/tools/who_what_benchmark/examples/openvino_batched_eval.py b/tools/who_what_benchmark/examples/openvino_batched_eval.py index fdbaa9acdb..2c3a10b2fa 100644 --- a/tools/who_what_benchmark/examples/openvino_batched_eval.py +++ b/tools/who_what_benchmark/examples/openvino_batched_eval.py @@ -55,7 +55,7 @@ generation_config.num_return_sequences = 1 generation_config.max_new_tokens = MAX_NEW_TOKENS -data = load_dataset(path="squad", name=None, split="validation")["context"] +data = load_dataset(path="squad", name=None, split="validation", cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets'))["context"] data_dict = {"prompts": list(dict({k: None for k in data}).keys())[:MAX_SEQUENCES]} model_cb_noopt = ContinuousBatchingPipeline( diff --git a/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py b/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py index 90eb6c7c87..ba26b276c6 100644 --- a/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py @@ -1,4 +1,5 @@ import os +import tempfile from typing import Any, Union import datasets @@ -26,7 +27,7 @@ def prepare_default_data(num_samples=None): NUM_SAMPLES = 10 if num_samples is None else num_samples set_seed(42) default_dataset = datasets.load_dataset( - DATASET_NAME, split="test", streaming=True + DATASET_NAME, split="test", streaming=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ).filter(lambda example: example["Instruction_VLM-LLM"] != "").take(NUM_SAMPLES) return default_dataset.map( lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names diff --git a/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py b/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py index c3fe0825f7..218c0202ac 100644 --- a/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py @@ -1,4 +1,5 @@ import os +import tempfile from typing import Any, Union import datasets @@ -27,7 +28,7 @@ def prepare_default_data(num_samples=None): NUM_SAMPLES = 10 if num_samples is None else num_samples set_seed(42) default_dataset = datasets.load_dataset( - DATASET_NAME, split="test", streaming=True + DATASET_NAME, split="test", streaming=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ).filter(lambda example: example["inpaint_caption"] != "").take(NUM_SAMPLES) return default_dataset.map( lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index e42c623a36..1227498d88 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -1,5 +1,6 @@ import argparse import difflib +import tempfile import numpy as np import logging import os @@ -234,7 +235,7 @@ def load_prompts(args): else: path = args.dataset name = None - data = load_dataset(path=path, name=name, split=split) + data = load_dataset(path=path, name=name, split=split, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) res = data[args.dataset_field] res = {"prompts": list(res)} From a6028eeee9a4f16e707ba619e051e099591fabbb Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 13:13:07 +0200 Subject: [PATCH 07/19] Revert "use cache for datasets" This reverts commit 72ab81a381376ec89adb5e1305f91615d68c44ac. --- tests/python_tests/test_kv_cache_eviction.py | 6 ++---- tests/python_tests/test_whisper_pipeline.py | 7 +------ tools/who_what_benchmark/examples/openvino_batched_eval.py | 2 +- tools/who_what_benchmark/whowhatbench/im2im_evaluator.py | 3 +-- tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py | 3 +-- tools/who_what_benchmark/whowhatbench/wwb.py | 3 +-- 6 files changed, 7 insertions(+), 17 deletions(-) diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py index c26255eb09..260adae397 100644 --- a/tests/python_tests/test_kv_cache_eviction.py +++ b/tests/python_tests/test_kv_cache_eviction.py @@ -1,9 +1,7 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import os import sys -import tempfile import datasets import pytest from dataclasses import dataclass @@ -238,7 +236,7 @@ def test_optimized_generation_longbench(test_struct): generation_config.num_return_sequences = 1 generation_config.max_new_tokens = max_new_tokens - data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', trust_remote_code=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) + data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', trust_remote_code=True) with tqdm(total=len(data)) as progress_bar: batch = [] answers = [] @@ -326,7 +324,7 @@ def test_kvcrush_vs_snapkv_baseline(subset): generation_config.max_new_tokens = max_new_tokens generation_config.apply_chat_template = False - data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]', cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) + data = datasets.load_dataset('THUDM/LongBench', subset, split='test[:32]') with tqdm(total=len(data)) as progress_bar: batch = [] baseline_answers = [] diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index b62b825849..1e5302814d 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -1,8 +1,6 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import os -import tempfile import openvino_genai as ov_genai import functools import pytest @@ -191,7 +189,6 @@ def get_whisper_dataset(language: str, long_form: bool) -> list: split="test", streaming=True, trust_remote_code=True, - cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ) else: ds = datasets.load_dataset( @@ -199,7 +196,6 @@ def get_whisper_dataset(language: str, long_form: bool) -> list: split="test", streaming=True, trust_remote_code=True, - cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') ) ds = typing.cast(datasets.IterableDataset, ds) ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16000)) @@ -541,8 +537,7 @@ def test_longform_audio_with_past(model_descr, sample_from_dataset): def test_shortform(model_descr): samples = [] ds = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", - cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') + "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation" ) for ds_row in ds: diff --git a/tools/who_what_benchmark/examples/openvino_batched_eval.py b/tools/who_what_benchmark/examples/openvino_batched_eval.py index 2c3a10b2fa..fdbaa9acdb 100644 --- a/tools/who_what_benchmark/examples/openvino_batched_eval.py +++ b/tools/who_what_benchmark/examples/openvino_batched_eval.py @@ -55,7 +55,7 @@ generation_config.num_return_sequences = 1 generation_config.max_new_tokens = MAX_NEW_TOKENS -data = load_dataset(path="squad", name=None, split="validation", cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets'))["context"] +data = load_dataset(path="squad", name=None, split="validation")["context"] data_dict = {"prompts": list(dict({k: None for k in data}).keys())[:MAX_SEQUENCES]} model_cb_noopt = ContinuousBatchingPipeline( diff --git a/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py b/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py index ba26b276c6..90eb6c7c87 100644 --- a/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py @@ -1,5 +1,4 @@ import os -import tempfile from typing import Any, Union import datasets @@ -27,7 +26,7 @@ def prepare_default_data(num_samples=None): NUM_SAMPLES = 10 if num_samples is None else num_samples set_seed(42) default_dataset = datasets.load_dataset( - DATASET_NAME, split="test", streaming=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') + DATASET_NAME, split="test", streaming=True ).filter(lambda example: example["Instruction_VLM-LLM"] != "").take(NUM_SAMPLES) return default_dataset.map( lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names diff --git a/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py b/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py index 218c0202ac..c3fe0825f7 100644 --- a/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py @@ -1,5 +1,4 @@ import os -import tempfile from typing import Any, Union import datasets @@ -28,7 +27,7 @@ def prepare_default_data(num_samples=None): NUM_SAMPLES = 10 if num_samples is None else num_samples set_seed(42) default_dataset = datasets.load_dataset( - DATASET_NAME, split="test", streaming=True, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets') + DATASET_NAME, split="test", streaming=True ).filter(lambda example: example["inpaint_caption"] != "").take(NUM_SAMPLES) return default_dataset.map( lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index 1227498d88..e42c623a36 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -1,6 +1,5 @@ import argparse import difflib -import tempfile import numpy as np import logging import os @@ -235,7 +234,7 @@ def load_prompts(args): else: path = args.dataset name = None - data = load_dataset(path=path, name=name, split=split, cache_dir=os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'datasets')) + data = load_dataset(path=path, name=name, split=split) res = data[args.dataset_field] res = {"prompts": list(res)} From ffbafd1733cb05fb4327a939e7a2d5a5908c418a Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 14:28:31 +0200 Subject: [PATCH 08/19] verbosity --- .github/workflows/linux.yml | 2 ++ .github/workflows/windows.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0823389515..86b81dde6e 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -26,6 +26,8 @@ env: SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64 HF_HOME: /mount/caches/huggingface/lin OV_CACHE: /mount/caches/huggingface/.ov_cache/lin + HF_HUB_VERBOSITY: debug + TRANSFORMERS_VERBOSITY: debug OPENVINO_LOG_LEVEL: 5 GENAI_ARCHIVE_NAME: genai.tar.gz GENAI_SAMPLES_NAME: genai_samples.tar.gz diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index a344aac352..01d0948588 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -24,6 +24,8 @@ env: CCACHE_MAXSIZE: 500Mi HF_HOME: C:/mount/caches/huggingface/win OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/ + HF_HUB_VERBOSITY: debug + TRANSFORMERS_VERBOSITY: debug OPENVINO_LOG_LEVEL: 5 ARTIFACTS_SHARE: '/mount/build-artifacts' BASE_PRODUCT_TYPE: public_windows_vs2022 From 57916ca05497efcc6f2a644ae53d1adcc3e9dcdd Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 15:40:51 +0200 Subject: [PATCH 09/19] use cache for ov models only --- .../tests/test_cli_image.py | 10 +++------- .../who_what_benchmark/tests/test_cli_text.py | 8 ++------ .../who_what_benchmark/tests/test_cli_vlm.py | 19 ++++++++++--------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index ea5a2ef7c3..8ee2bd5575 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -68,13 +68,11 @@ def get_similarity(output: str) -> float: ], ) def test_image_model_types(model_id, model_type, backend, tmp_path): - MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) - MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id wwb_args = [ "--base-model", - MODEL_PATH, + model_id, "--target-model", - MODEL_PATH, + model_id, "--num-samples", "1", "--gt-data", @@ -199,11 +197,9 @@ def test_image_model_genai(model_id, model_type, tmp_path): ) def test_image_custom_dataset(model_id, model_type, backend, tmp_path): GT_FILE = tmp_path / "test_sd.csv" - MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) - MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id wwb_args = [ "--base-model", - MODEL_PATH, + model_id, "--num-samples", "1", "--gt-data", diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 0f90f34d71..d07f4a68fa 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -142,11 +142,9 @@ def test_text_verbose(): def test_text_language(tmp_path): temp_file_name = tmp_path / "gt.csv" - MODEL_PATH = WWB_CACHE_PATH.joinpath('Qwen/Qwen2-0.5B'.replace("/", "--")) - MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else 'Qwen/Qwen2-0.5B' run_wwb([ "--base-model", - MODEL_PATH, + 'Qwen/Qwen2-0.5B', "--gt-data", temp_file_name, "--num-samples", @@ -176,11 +174,9 @@ def test_text_language(tmp_path): ) def test_text_hf_model(model_id, tmp_path): temp_file_name = tmp_path / "gt.csv" - MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) - MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id run_wwb([ "--base-model", - MODEL_PATH, + model_id, "--gt-data", temp_file_name, "--num-samples", diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index db21c0cce2..7640563e3a 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -20,15 +20,16 @@ def test_vlm_basic(model_id, model_type): MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id - result = subprocess.run(["optimum-cli", "export", - "openvino", "-m", model_id, - MODEL_PATH, "--task", - "image-text-to-text", - "--trust-remote-code"], - capture_output=True, - text=True, - ) - assert result.returncode == 0 + if not MODEL_PATH.exists(): + result = subprocess.run(["optimum-cli", "export", + "openvino", "-m", model_id, + MODEL_PATH, "--task", + "image-text-to-text", + "--trust-remote-code"], + capture_output=True, + text=True, + ) + assert result.returncode == 0 # Collect reference with HF model run_wwb([ From 01a91ed1f49e704735c51e05d28e52d1680ce9f0 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 2 Oct 2025 15:58:01 +0200 Subject: [PATCH 10/19] lint --- tools/who_what_benchmark/tests/test_cli_vlm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index 7640563e3a..9365d5a406 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -22,10 +22,10 @@ def test_vlm_basic(model_id, model_type): if not MODEL_PATH.exists(): result = subprocess.run(["optimum-cli", "export", - "openvino", "-m", model_id, - MODEL_PATH, "--task", - "image-text-to-text", - "--trust-remote-code"], + "openvino", "-m", model_id, + MODEL_PATH, "--task", + "image-text-to-text", + "--trust-remote-code"], capture_output=True, text=True, ) From 2dcc9ba623b7d74c60ba248c88174dfc398a2141 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Mon, 6 Oct 2025 14:10:08 +0200 Subject: [PATCH 11/19] rm unused --- tools/who_what_benchmark/tests/test_cli_vlm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index ae000477bf..6625904201 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -21,7 +21,6 @@ def test_vlm_basic(model_id, model_type): pytest.xfail("Ticket 173169") GT_FILE = WWB_CACHE_PATH / "gt.csv" MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) - MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id if not MODEL_PATH.exists(): result = subprocess.run(["optimum-cli", "export", From 19e5943b9c095b8d3c1f3e69358b6d9b252474e0 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Tue, 7 Oct 2025 10:47:33 +0200 Subject: [PATCH 12/19] quotes, reuse cache dir setting logic --- tools/who_what_benchmark/tests/constants.py | 23 +++++++++++++++++-- .../who_what_benchmark/tests/test_cli_text.py | 2 +- .../who_what_benchmark/tests/test_cli_vlm.py | 4 ++-- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tools/who_what_benchmark/tests/constants.py b/tools/who_what_benchmark/tests/constants.py index 4a085ddcc0..4da6c98f96 100644 --- a/tools/who_what_benchmark/tests/constants.py +++ b/tools/who_what_benchmark/tests/constants.py @@ -1,7 +1,26 @@ from pathlib import Path import os import tempfile +from datetime import datetime +from importlib import metadata -WWB_CACHE_PATH = Path(os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory().name), 'wwb_cache')) -SHOULD_CLEANUP = os.environ.get('CLEANUP_CACHE', '').lower() in ('1', 'true', 'yes') +SHOULD_CLEANUP = os.environ.get("CLEANUP_CACHE", "").lower() in ("1", "true", "yes") + + +def get_wwb_cache_dir(temp_dir=tempfile.TemporaryDirectory()) -> Path: + if "OV_CACHE" in os.environ: + date_subfolder = datetime.now().strftime("%Y%m%d") + ov_cache = os.path.join(os.environ["OV_CACHE"], date_subfolder) + try: + optimum_intel_version = metadata.version("optimum-intel") + transformers_version = metadata.version("transformers") + ov_cache = os.path.join(ov_cache, f"optimum-intel-{optimum_intel_version}_transformers-{transformers_version}") + except metadata.PackageNotFoundError: + pass + else: + ov_cache = temp_dir.name + return Path(ov_cache).joinpath("wwb_cache") + + +WWB_CACHE_PATH = get_wwb_cache_dir() diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index c9b0f110c1..d850a547f0 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -149,7 +149,7 @@ def test_text_language(tmp_path): temp_file_name = tmp_path / "gt.csv" run_wwb([ "--base-model", - 'Qwen/Qwen2-0.5B', + "Qwen/Qwen2-0.5B", "--gt-data", temp_file_name, "--num-samples", diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index 6625904201..10093db5a0 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -16,10 +16,10 @@ ("katuni4ka/tiny-random-llava", "visual-text"), ], ) -def test_vlm_basic(model_id, model_type): +def test_vlm_basic(model_id, model_type, tmp_path): if sys.platform == 'darwin': pytest.xfail("Ticket 173169") - GT_FILE = WWB_CACHE_PATH / "gt.csv" + GT_FILE = tmp_path / "gt.csv" MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) if not MODEL_PATH.exists(): From 31bc592c70f74c2c81cfc437e17d05ecb47f8298 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Tue, 7 Oct 2025 14:29:32 +0200 Subject: [PATCH 13/19] use path to hf model --- tools/who_what_benchmark/tests/test_cli_vlm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index 10093db5a0..b86d204cb5 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -36,7 +36,7 @@ def test_vlm_basic(model_id, model_type, tmp_path): # Collect reference with HF model run_wwb([ "--base-model", - MODEL_PATH, + model_id, "--num-samples", "1", "--gt-data", From f8f74375a8d950b0ea2ae11a1765f459288bccf1 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Tue, 7 Oct 2025 13:32:30 +0100 Subject: [PATCH 14/19] Apply suggestions from code review Co-authored-by: Alexander Suvorov --- tools/who_what_benchmark/tests/constants.py | 2 +- tools/who_what_benchmark/tests/test_cli_image.py | 4 ++-- tools/who_what_benchmark/tests/test_cli_text.py | 4 ++-- tools/who_what_benchmark/tests/test_cli_vlm.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/who_what_benchmark/tests/constants.py b/tools/who_what_benchmark/tests/constants.py index 4da6c98f96..b3a223b024 100644 --- a/tools/who_what_benchmark/tests/constants.py +++ b/tools/who_what_benchmark/tests/constants.py @@ -20,7 +20,7 @@ def get_wwb_cache_dir(temp_dir=tempfile.TemporaryDirectory()) -> Path: pass else: ov_cache = temp_dir.name - return Path(ov_cache).joinpath("wwb_cache") + return Path(ov_cache) / "wwb_cache" WWB_CACHE_PATH = get_wwb_cache_dir() diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index 402018c7bf..16fa84235e 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -38,7 +38,7 @@ def run_wwb(args): def setup_module(): for model_id in OV_IMAGE_MODELS: - MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE / model_id.replace("/", "--") subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH], capture_output=True, text=True) @@ -122,7 +122,7 @@ def test_image_model_genai(model_id, model_type, tmp_path): pytest.xfail("Ticket 173169") GT_FILE = tmp_path / "gt.csv" - MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--")) + MODEL_PATH = MODEL_CACHE / model_id.replace("/", "--") MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id run_wwb([ diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index d850a547f0..e978c8091b 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -20,8 +20,8 @@ model_id = "facebook/opt-125m" cache_dir = WWB_CACHE_PATH -base_model_path = os.path.join(cache_dir, "opt125m") -target_model_path = os.path.join(cache_dir, "opt125m_int8") +base_model_path = cache_dir / "opt125m" +target_model_path = cache_dir / "opt125m_int8" gptq_model_id = "ybelkada/opt-125m-gptq-4bit" awq_model_id = "TitanML/tiny-mixtral-AWQ-4bit" diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index b86d204cb5..a33a93b85d 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -20,7 +20,7 @@ def test_vlm_basic(model_id, model_type, tmp_path): if sys.platform == 'darwin': pytest.xfail("Ticket 173169") GT_FILE = tmp_path / "gt.csv" - MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--")) + MODEL_PATH = WWB_CACHE_PATH / model_id.replace("/", "--") if not MODEL_PATH.exists(): result = subprocess.run(["optimum-cli", "export", From 42d1eb07aa583f253f408f77cd8f0e1bea75efe6 Mon Sep 17 00:00:00 2001 From: Alexander Suvorov Date: Tue, 7 Oct 2025 16:59:12 +0200 Subject: [PATCH 15/19] Update tools/who_what_benchmark/tests/test_cli_vlm.py --- tools/who_what_benchmark/tests/test_cli_vlm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py index a33a93b85d..8d9186df35 100644 --- a/tools/who_what_benchmark/tests/test_cli_vlm.py +++ b/tools/who_what_benchmark/tests/test_cli_vlm.py @@ -20,7 +20,7 @@ def test_vlm_basic(model_id, model_type, tmp_path): if sys.platform == 'darwin': pytest.xfail("Ticket 173169") GT_FILE = tmp_path / "gt.csv" - MODEL_PATH = WWB_CACHE_PATH / model_id.replace("/", "--") + MODEL_PATH = WWB_CACHE_PATH / model_id.replace("/", "--") if not MODEL_PATH.exists(): result = subprocess.run(["optimum-cli", "export", From 464d2f94976fb83d12840d291a08ce6e6b642b5b Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Thu, 9 Oct 2025 12:23:44 +0200 Subject: [PATCH 16/19] rm cache dir --- tools/who_what_benchmark/tests/test_cli_text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index d850a547f0..550b7afedb 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -32,15 +32,15 @@ def setup_module(): if not os.path.exists(base_model_path): logger.info("Create models") - tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) - base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH) + tokenizer = AutoTokenizer.from_pretrained(model_id) + base_model = OVModelForCausalLM.from_pretrained(model_id) base_model.save_pretrained(base_model_path) tokenizer.save_pretrained(base_model_path) export_tokenizer(tokenizer, base_model_path) if not os.path.exists(target_model_path): target_model = OVModelForCausalLM.from_pretrained( - model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH + model_id, quantization_config=OVWeightQuantizationConfig(bits=8) ) target_model.save_pretrained(target_model_path) tokenizer.save_pretrained(target_model_path) From af170004081c437577f32c3a983f3f16504e1418 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Mon, 13 Oct 2025 10:53:18 +0200 Subject: [PATCH 17/19] save tokenizer to a separate folder --- tools/who_what_benchmark/tests/test_cli_text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 451932735a..3b25d0cbdb 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -29,13 +29,13 @@ def setup_module(): from optimum.exporters.openvino.convert import export_tokenizer - + if not os.path.exists(base_model_path): logger.info("Create models") tokenizer = AutoTokenizer.from_pretrained(model_id) base_model = OVModelForCausalLM.from_pretrained(model_id) base_model.save_pretrained(base_model_path) - tokenizer.save_pretrained(base_model_path) + tokenizer.save_pretrained(base_model_path / "tokenizer") export_tokenizer(tokenizer, base_model_path) if not os.path.exists(target_model_path): @@ -43,7 +43,7 @@ def setup_module(): model_id, quantization_config=OVWeightQuantizationConfig(bits=8) ) target_model.save_pretrained(target_model_path) - tokenizer.save_pretrained(target_model_path) + tokenizer.save_pretrained(target_model_path / "tokenizer") export_tokenizer(tokenizer, target_model_path) From cb3ad5f5131b200230f01a13c322931320df53a8 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Mon, 13 Oct 2025 12:31:57 +0200 Subject: [PATCH 18/19] do not use save pretrained --- tools/who_what_benchmark/tests/test_cli_text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 3b25d0cbdb..7993cd3072 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -29,13 +29,13 @@ def setup_module(): from optimum.exporters.openvino.convert import export_tokenizer - + if not os.path.exists(base_model_path): logger.info("Create models") tokenizer = AutoTokenizer.from_pretrained(model_id) base_model = OVModelForCausalLM.from_pretrained(model_id) base_model.save_pretrained(base_model_path) - tokenizer.save_pretrained(base_model_path / "tokenizer") + # tokenizer.save_pretrained(base_model_path / "tokenizer") export_tokenizer(tokenizer, base_model_path) if not os.path.exists(target_model_path): @@ -43,7 +43,7 @@ def setup_module(): model_id, quantization_config=OVWeightQuantizationConfig(bits=8) ) target_model.save_pretrained(target_model_path) - tokenizer.save_pretrained(target_model_path / "tokenizer") + # tokenizer.save_pretrained(target_model_path / "tokenizer") export_tokenizer(tokenizer, target_model_path) From 6674ebf8e202ef9fe8844fded5e5e5890b16e633 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Mon, 13 Oct 2025 14:47:29 +0200 Subject: [PATCH 19/19] export tokenizer --- tools/who_what_benchmark/tests/test_cli_text.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index 7993cd3072..a477ca2f88 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -35,16 +35,19 @@ def setup_module(): tokenizer = AutoTokenizer.from_pretrained(model_id) base_model = OVModelForCausalLM.from_pretrained(model_id) base_model.save_pretrained(base_model_path) - # tokenizer.save_pretrained(base_model_path / "tokenizer") - export_tokenizer(tokenizer, base_model_path) + tokenizer.save_pretrained(base_model_path) + else: + tokenizer = AutoTokenizer.from_pretrained(base_model_path) + export_tokenizer(tokenizer, base_model_path) if not os.path.exists(target_model_path): target_model = OVModelForCausalLM.from_pretrained( model_id, quantization_config=OVWeightQuantizationConfig(bits=8) ) + tokenizer = AutoTokenizer.from_pretrained(model_id) target_model.save_pretrained(target_model_path) - # tokenizer.save_pretrained(target_model_path / "tokenizer") - export_tokenizer(tokenizer, target_model_path) + tokenizer.save_pretrained(target_model_path) + export_tokenizer(tokenizer, target_model_path) def teardown_module():