diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index cc8a7f6bf..e6199947f 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -1,4 +1,4 @@ -name: macOS (13, Python 3.10) +name: macOS (14, Python 3.10) on: workflow_dispatch: pull_request: @@ -19,7 +19,7 @@ env: MACOSX_DEPLOYMENT_TARGET: '11.0' PYTHON_VERSION: '3.10' TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} - BASE_PRODUCT_TYPE: public_darwin + BASE_PRODUCT_TYPE: public_macos_arm64 CCACHE_MAXSIZE: 500Mi HF_HOME: ~/.cache/hf OV_CACHE: ~/.cache/ov_cache/194c936 @@ -82,7 +82,8 @@ jobs: - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master id: openvino_download with: - platform: macos_12_6 + platform: macos_14_7 + arch: 'arm64' commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz revision: latest_available_commit @@ -97,7 +98,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: CMAKE_GENERATOR: Ninja CMAKE_CXX_COMPILER_LAUNCHER: ccache @@ -149,7 +150,7 @@ jobs: with: repos: ${{ env.SRC_DIR }} product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} - target_arch: 'x86_64' + target_arch: 'arm64' build_type: ${{ matrix.build-type }} save_to: ${{ env.MANIFEST_PATH }} @@ -162,7 +163,7 @@ jobs: cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ -DENABLE_PYTHON=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ - -S ${{ env.SRC_DIR}} \ + -S ${{ env.SRC_DIR }} \ -B ${{ env.BUILD_DIR }} cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} @@ -217,7 +218,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14-xlarge env: CCACHE_DIR: ${{ github.workspace }}/ccache CMAKE_CXX_COMPILER_LAUNCHER: ccache @@ -266,7 +267,7 @@ jobs: with: repos: ${{ env.SRC_DIR }} product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release - target_arch: 'x86_64' + target_arch: 'arm64' build_type: Release save_to: ${{ github.workspace }} @@ -318,7 +319,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: OV_INSTALL_DIR: ${{ github.workspace }}/ov INSTALL_DIR: ${{ github.workspace }}/install @@ -370,11 +371,11 @@ jobs: matrix: build-type: [Release] needs: [ openvino_download ] - timeout-minutes: 30 + timeout-minutes: 50 defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: SRC_DIR: ${{ github.workspace }}/openvino.genai @@ -406,7 +407,7 @@ jobs: -DENABLE_JS=ON -DCPACK_GENERATOR=NPM \ -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF \ -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} - cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel --verbose + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} - name: Upload Node.js bindings Build Package @@ -426,14 +427,20 @@ jobs: matrix: test: - name: 'Whisper' - # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + # TODO: skip some tests temporary until https://github.com/huggingface/datasets/issues/7647 dataset is fixed cmd: 'tests/python_tests/test_whisper_pipeline.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} timeout: 120 - - name: 'LLM & VLM' - cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py -p no:cacheprovider' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} - timeout: 180 + # Only supported on X64 or ARM with SVE support + # - name: 'Cacheopt E2E' + # cmd: 'tests/python_tests/test_kv_cache_eviction.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 240 + # Only supported on X64 or ARM with SVE support + # - name: 'LLM & VLM' + # cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + # timeout: 180 - name: 'GGUF Reader tests' cmd: 'tests/python_tests/test_gguf_reader.py' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} @@ -442,10 +449,11 @@ jobs: cmd: 'tests/python_tests/test_tokenizer.py' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} timeout: 60 - - name: 'API tests' - cmd: 'tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' - run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} - timeout: 60 + # Only supported on X64 or ARM with SVE support + # - name: 'API tests' + # cmd: 'tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + # timeout: 60 - name: 'Rag tests' cmd: 'tests/python_tests/test_rag.py' run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} @@ -457,7 +465,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: INSTALL_DIR: ${{ github.workspace }}/install SRC_DIR: ${{ github.workspace }}/src @@ -536,7 +544,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: INSTALL_DIR: ${{ github.workspace }}/ov SRC_DIR: ${{ github.workspace }}/src @@ -636,7 +644,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: INSTALL_DIR: ${{ github.workspace }}/ov SRC_DIR: ${{ github.workspace }}/src @@ -693,7 +701,7 @@ jobs: defaults: run: shell: bash - runs-on: macos-13 + runs-on: macos-14 env: SRC_DIR: ${{ github.workspace }}/openvino.genai diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt index f01510ea4..0d76aac16 100644 --- a/samples/export-requirements.txt +++ b/samples/export-requirements.txt @@ -10,7 +10,7 @@ diffusers==0.34.0 # For image generation pipelines timm==1.0.19 # For exporting InternVL2 # torchvision for visual language models torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64" -torchvision==0.23.0+cpu; platform_system != "Darwin" or platform_machine != "x86_64" +torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64" transformers==4.52.4 # For Whisper hf_transfer==0.1.9 # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1 backoff==2.2.1 # for microsoft/Phi-3.5-vision-instruct diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt index 8692f9ba9..12ef85738 100644 --- a/tests/python_tests/requirements.txt +++ b/tests/python_tests/requirements.txt @@ -18,7 +18,7 @@ langchain-core==0.3.75 einops==0.8.1 # - openbmb/MiniCPM-V-2 torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64" -torchvision==0.23.0+cpu; platform_system != "Darwin" or platform_machine != "x86_64" +torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64" # - openbmb/MiniCPM-V-2 timm==1.0.19 # - openai/whisper-base diff --git a/tests/python_tests/test_gguf_reader.py b/tests/python_tests/test_gguf_reader.py index 7451d77aa..d81d5334b 100644 --- a/tests/python_tests/test_gguf_reader.py +++ b/tests/python_tests/test_gguf_reader.py @@ -3,6 +3,7 @@ import pytest +import platform import torch import gc import sys @@ -112,6 +113,8 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model): @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types()) @pytest.mark.parametrize("model_ids", [{"gguf_model_id": "Qwen/Qwen3-0.6B-GGUF", "gguf_filename": "Qwen3-0.6B-Q8_0.gguf"}]) +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 172335") @pytest.mark.precommit def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids): # Temporal testing solution until transformers starts to support qwen3 in GGUF format diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index 1e5302814..b5ee89c53 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -3,7 +3,9 @@ import openvino_genai as ov_genai import functools +import platform import pytest +import sys import openvino_tokenizers import openvino import datasets @@ -457,6 +459,8 @@ def test_language_autodetect(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_return_timestamps_short_form(model_descr, sample_from_dataset): run_pipeline_with_ref( model_id=model_descr[0], @@ -469,6 +473,8 @@ def test_return_timestamps_short_form(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_dataset): run_pipeline_with_ref( model_id=model_descr[0], @@ -483,6 +489,8 @@ def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_da @pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=10, long_form=True)], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_longform_audio(model_descr, sample_from_dataset): _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr) @@ -509,6 +517,8 @@ def test_longform_audio(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_longform_audio_with_past(model_descr, sample_from_dataset): _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr, stateful=True) @@ -534,6 +544,8 @@ def test_longform_audio_with_past(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_shortform(model_descr): samples = [] ds = datasets.load_dataset( @@ -553,6 +565,8 @@ def test_shortform(model_descr): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_beam_search(model_descr, sample_from_dataset): # use only 30 seconds of audio due to beam search results wrong with enabled timestamps # ticket: 167239 @@ -631,6 +645,8 @@ def test_random_sampling(model_descr, sample_from_dataset): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True) @pytest.mark.precommit +@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l', + 'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169") def test_perf_metrics(model_descr, sample_from_dataset): model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr) diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py index b354056da..0536fc044 100644 --- a/tools/who_what_benchmark/tests/test_cli_image.py +++ b/tools/who_what_benchmark/tests/test_cli_image.py @@ -1,7 +1,9 @@ import itertools import subprocess # nosec B404 import os +import sys import shutil +import platform import pytest import logging import tempfile @@ -108,7 +110,17 @@ def test_image_model_genai(model_id, model_type, tmp_path): pytest.skip(reason="FLUX-Fill is supported as inpainting only") if model_type == "image-inpainting": pytest.xfail("Segfault. Ticket 170877") - + + mac_arm64_skip = ('stable-diffusion-xl-image-to-image' in model_id or + 'stable-diffusion-3-tiny-random-image-to-image' in model_id or + 'stable-diffusion-3-tiny-random-text-to-image' in model_id or + 'tiny-random-flux' in model_id) + + if mac_arm64_skip \ + and (sys.platform == "darwin") \ + and (platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64')): + pytest.xfail("Ticket 173169") + GT_FILE = tmp_path / "gt.csv" MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--")) diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py index f564d53c0..e40c2d4fa 100644 --- a/tools/who_what_benchmark/tests/test_cli_text.py +++ b/tools/who_what_benchmark/tests/test_cli_text.py @@ -1,6 +1,7 @@ import os import shutil import tempfile +import platform import pandas as pd import pytest import logging @@ -49,6 +50,7 @@ def teardown_module(): shutil.rmtree(tmp_dir) +@pytest.mark.skipif((sys.platform == "darwin") and (platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64')), reason='173169') def test_text_target_model(): run_wwb([ "--base-model",