Upgrade optimum-intel and transformers (#2611)

Wovchena · web-flow · commit e0d98b1531e6 · 2025-10-26T18:32:34.000Z
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -25,7 +25,7 @@ env:
   SCCACHE_CACHE_SIZE: 30G
   SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64
   HF_HOME: /mount/caches/huggingface/lin
-  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin
+  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/
   OPENVINO_LOG_LEVEL: 4
   GENAI_ARCHIVE_NAME: genai.tar.gz
   GENAI_SAMPLES_NAME: genai_samples.tar.gz
@@ -520,7 +520,7 @@ jobs:
           - name: 'Cacheopt E2E (Part 2)'
             cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
-            timeout: 240
+            timeout: 360
           - name: 'LLM & VLM'
             cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
@@ -544,7 +544,7 @@ jobs:
           - name: 'WWB tests'
             cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
-            timeout: 90
+            timeout: 120
           - name: 'WWB tests (nanollava)'
             cmd: |
               python -m pip install transformers==4.48.0
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -22,7 +22,7 @@ env:
   BASE_PRODUCT_TYPE: public_macos_arm64
   CCACHE_MAXSIZE: 500Mi
   HF_HOME: ~/.cache/hf
-  OV_CACHE: ~/.cache/ov_cache/194c936
+  OV_CACHE: ~/.cache/ov_cache/
   CLEANUP_CACHE: 1
   OPENVINO_LOG_LEVEL: 4
 
@@ -470,7 +470,7 @@ jobs:
           - name: 'WWB tests'
             cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
-            timeout: 180
+            timeout: 120
           - name: 'WWB tests (nanollava)'
             cmd: |
               python -m pip install transformers==4.48.0
@@ -518,13 +518,6 @@ jobs:
           requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt"
           local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels
 
-      # transformers >= 4.52 require torch >= 2.6 and raise an error otherwise:
-      # ValueError: Due to a serious vulnerability issue in `torch.load`, even with `weights_only=True`, we now require users to upgrade torch to at least v2.6 in order to use the function. This version restriction does not apply when loading files with safetensors.
-      # See the vulnerability report here https://nvd.nist.gov/vuln/detail/CVE-2025-32434
-
-      # x86_64 macOS does not (and will not) support newer versions of torch > 2.2 which are used in the newer transformers versions. It's not possible to lower transformer version in requirements.txt because that triggers vulnerability alert: https://github.com/openvinotoolkit/openvino_tokenizers/security/dependabot/11
-      - run: python -m pip install "transformers<4.52"
-
       - name: Tests
         if: ${{ matrix.test.run_condition }}
         run: ${{ matrix.test.cmd }}
diff --git a/.github/workflows/manylinux_2_28.yml b/.github/workflows/manylinux_2_28.yml
@@ -25,7 +25,7 @@ env:
   SCCACHE_CACHE_SIZE: 30G
   SCCACHE_AZURE_KEY_PREFIX: genai/manylinux_2_28
   HF_HOME: /mount/caches/huggingface/lin
-  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin
+  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/
   OPENVINO_LOG_LEVEL: 4
   GENAI_ARCHIVE_NAME: genai.tar.gz
   GENAI_SAMPLES_NAME: genai_samples.tar.gz
@@ -458,7 +458,7 @@ jobs:
           - name: 'Cacheopt E2E (Part 2)'
             cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
-            timeout: 240
+            timeout: 360
           - name: 'LLM & VLM'
             cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
@@ -482,7 +482,7 @@ jobs:
           - name: 'WWB tests'
             cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
-            timeout: 180
+            timeout: 120
           - name: 'WWB tests (nanollava)'
             cmd: |
               python -m pip install transformers==4.48.0
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -621,7 +621,7 @@ jobs:
           - name: 'Cacheopt E2E (Part 2)'
             cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
-            timeout: 240
+            timeout: 360
           - name: 'LLM & VLM'
             cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
@@ -645,7 +645,7 @@ jobs:
           - name: 'WWB tests'
             cmd: 'python -m pytest -s -v tools/who_what_benchmark/tests -m "not nanollava"'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
-            timeout: 180
+            timeout: 120
           - name: 'WWB tests (nanollava)'
             cmd: |
               python -m pip install transformers==4.48.0
diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
 openvino-tokenizers[transformers]~=2025.4.0.0.dev
-optimum-intel[nncf]==1.25.2
+optimum-intel[nncf]==1.26.0
 numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
 safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
 einops==0.8.1  # For Qwen
@@ -12,7 +12,7 @@ timm==1.0.20  # For exporting InternVL2
 torch==2.8.0
 torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
 torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
-transformers==4.53.3 # For Whisper
+transformers==4.55.4 # For Whisper
 hf_transfer==0.1.9  # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
 backoff==2.2.1  # for microsoft/Phi-3.5-vision-instruct
 peft==0.17.1  # For microsoft/Phi-4-multimodal-instruct
diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt
@@ -1,10 +1,10 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 diffusers==0.35.2
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@04db016571d1a19c14918553365ee4c05c8b4697
+optimum-intel==1.26.0
 numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
 safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
 pytest==8.4.2
-transformers==4.53.3
+transformers==4.55.4
 hf_transfer==0.1.9
 gguf==0.17.1
 torch==2.8.0
diff --git a/tests/python_tests/samples/test_text2speech.py b/tests/python_tests/samples/test_text2speech.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
+import subprocess  # nosec B404
 import sys
 import tempfile
 
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
@@ -178,6 +178,10 @@ def _setup_generation_config(
 
 
 def _get_ov_model(model_id: str) -> str:
+    if model_id in {"katuni4ka/tiny-random-phi-4-multimodal", "qnguyen3/nanoLLaVA"}:
+        pytest.skip("ValueError: The current version of Transformers does not allow for the export of the model. Maximum required is 4.53.3, got: 4.55.4")
+    if "katuni4ka/tiny-random-phi3-vision" == model_id:
+        pytest.xfail("AttributeError: 'DynamicCache' object has no attribute 'get_usable_length'. Ticket CVS-175110")
     ov_cache_models_dir = get_ov_cache_models_dir()
     dir_name = str(model_id).replace(os.sep, "_")
     model_dir = ov_cache_models_dir / dir_name
@@ -198,7 +202,13 @@ def _get_ov_model(model_id: str) -> str:
             device="CPU",
             export=True,
             load_in_8bit=False,
-            trust_remote_code=True,
+            trust_remote_code=model_id in {
+                "katuni4ka/tiny-random-minicpmv-2_6",
+                "katuni4ka/tiny-random-internvl2",
+                "katuni4ka/tiny-random-phi3-vision",
+                "katuni4ka/tiny-random-phi-4-multimodal",
+                "qnguyen3/nanoLLaVA",
+            },
         )
     )
     if model.config.model_type == "llava-qwen2":
diff --git a/tests/python_tests/utils/hugging_face.py b/tests/python_tests/utils/hugging_face.py
@@ -166,8 +166,8 @@ def run_hugging_face(
 
 # download HF model or read converted model
 def get_huggingface_models(model_id: str | Path, model_class: Type[OVModel], local_files_only=False):
-    hf_tokenizer = retry_request(lambda: AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, local_files_only=local_files_only))
-    opt_model = retry_request(lambda: model_class.from_pretrained(model_id, export=isinstance(model_id, str), compile=False, load_in_8bit=False, trust_remote_code=isinstance(model_id, str), ov_config=get_default_llm_properties(), local_files_only=local_files_only))
+    hf_tokenizer = retry_request(lambda: AutoTokenizer.from_pretrained(model_id, local_files_only=local_files_only))
+    opt_model = retry_request(lambda: model_class.from_pretrained(model_id, export=isinstance(model_id, str), compile=False, load_in_8bit=False, ov_config=get_default_llm_properties(), local_files_only=local_files_only))
     return opt_model, hf_tokenizer
 
 
diff --git a/tools/who_what_benchmark/tests/test_cli_embeddings.py b/tools/who_what_benchmark/tests/test_cli_embeddings.py
@@ -1,4 +1,5 @@
 import subprocess  # nosec B404
+import sys
 import pytest
 import logging
 from test_cli_image import run_wwb
@@ -11,7 +12,9 @@
 @pytest.mark.parametrize(
     ("model_id", "model_type"),
     [
-        ("BAAI/bge-small-en-v1.5", "text-embedding"),
+        pytest.param("BAAI/bge-small-en-v1.5", "text-embedding", marks=pytest.mark.xfail(
+            sys.platform == 'darwin', reason="Hangs. Ticket 175534", run=False
+        )),
         ("Qwen/Qwen3-Embedding-0.6B", "text-embedding"),
     ],
 )
diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py
@@ -111,6 +111,8 @@ def test_image_model_genai(model_id, model_type, tmp_path):
         pytest.skip(reason="FLUX-Fill is supported as inpainting only")
     if model_type == "image-inpainting":
         pytest.xfail("Segfault. Ticket 170877")
+    if model_id == "katuni4ka/tiny-random-flux" and model_type == "image-to-image":
+        pytest.xfail("Randomly wwb died with <Signals.SIGABRT: 6>. Ticket 170878")
 
     mac_arm64_skip = any(substring in model_id for substring in ('stable-diffusion-xl',
                                                                  'tiny-random-stable-diffusion',
diff --git a/tools/who_what_benchmark/tests/test_cli_reranking.py b/tools/who_what_benchmark/tests/test_cli_reranking.py
@@ -64,6 +64,7 @@ def test_reranking_genai(model_info, tmp_path):
 @pytest.mark.parametrize(
     ("model_info"), OV_RERANK_MODELS
 )
+@pytest.mark.xfail(sys.platform == 'darwin', reason="Hangs. Ticket 175534", run=False)
 def test_reranking_optimum(model_info, tmp_path):
     GT_FILE = Path(tmp_dir) / "gt.csv"
     model_id = model_info[0]