diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index cc8a7f6bf..e6199947f 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -1,4 +1,4 @@
-name: macOS (13, Python 3.10)
+name: macOS (14, Python 3.10)
 on:
   workflow_dispatch:
   pull_request:
@@ -19,7 +19,7 @@ env:
   MACOSX_DEPLOYMENT_TARGET: '11.0'
   PYTHON_VERSION: '3.10'
   TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }}
-  BASE_PRODUCT_TYPE: public_darwin
+  BASE_PRODUCT_TYPE: public_macos_arm64
   CCACHE_MAXSIZE: 500Mi
   HF_HOME: ~/.cache/hf
   OV_CACHE: ~/.cache/ov_cache/194c936
@@ -82,7 +82,8 @@ jobs:
     - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
       id: openvino_download
       with:
-        platform: macos_12_6
+        platform: macos_14_7
+        arch: 'arm64'
         commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz
         revision: latest_available_commit
 
@@ -97,7 +98,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
     env:
       CMAKE_GENERATOR: Ninja
       CMAKE_CXX_COMPILER_LAUNCHER: ccache
@@ -149,7 +150,7 @@ jobs:
         with:
           repos: ${{ env.SRC_DIR }}
           product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }}
-          target_arch: 'x86_64'
+          target_arch: 'arm64'
           build_type: ${{ matrix.build-type }}
           save_to: ${{ env.MANIFEST_PATH }}
 
@@ -162,7 +163,7 @@ jobs:
           cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \
                 -DENABLE_PYTHON=ON \
                 -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-                -S ${{ env.SRC_DIR}} \
+                -S ${{ env.SRC_DIR }} \
                 -B ${{ env.BUILD_DIR }}
           cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose
           cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }}
@@ -217,7 +218,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14-xlarge
     env:
       CCACHE_DIR: ${{ github.workspace }}/ccache
       CMAKE_CXX_COMPILER_LAUNCHER: ccache
@@ -266,7 +267,7 @@ jobs:
         with:
           repos: ${{ env.SRC_DIR }}
           product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release
-          target_arch: 'x86_64'
+          target_arch: 'arm64'
           build_type: Release
           save_to: ${{ github.workspace }}
 
@@ -318,7 +319,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
     env:
       OV_INSTALL_DIR: ${{ github.workspace }}/ov
       INSTALL_DIR: ${{ github.workspace }}/install
@@ -370,11 +371,11 @@ jobs:
       matrix:
         build-type: [Release]
     needs: [ openvino_download ]
-    timeout-minutes: 30
+    timeout-minutes: 50
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
 
     env:
       SRC_DIR: ${{ github.workspace }}/openvino.genai
@@ -406,7 +407,7 @@ jobs:
             -DENABLE_JS=ON -DCPACK_GENERATOR=NPM \
             -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF \
             -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }}
-          cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel --verbose
+          cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose
           cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }}
 
       - name: Upload Node.js bindings Build Package
@@ -426,14 +427,20 @@ jobs:
       matrix:
         test:
           - name: 'Whisper'
-            # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed
+            # TODO: skip some tests temporary until https://github.com/huggingface/datasets/issues/7647 dataset is fixed
             cmd: 'tests/python_tests/test_whisper_pipeline.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }}
             timeout: 120
-          - name: 'LLM & VLM'
-            cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py -p no:cacheprovider'
-            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
-            timeout: 180
+          # Only supported on X64 or ARM with SVE support
+          # - name: 'Cacheopt E2E'
+          #   cmd: 'tests/python_tests/test_kv_cache_eviction.py'
+          #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
+          #   timeout: 240
+          # Only supported on X64 or ARM with SVE support
+          # - name: 'LLM & VLM'
+          #   cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py'
+          #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
+          #   timeout: 180
           - name: 'GGUF Reader tests'
             cmd: 'tests/python_tests/test_gguf_reader.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
@@ -442,10 +449,11 @@ jobs:
             cmd: 'tests/python_tests/test_tokenizer.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }}
             timeout: 60
-          - name: 'API tests'
-            cmd: 'tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py'
-            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }}
-            timeout: 60
+          # Only supported on X64 or ARM with SVE support
+          # - name: 'API tests'
+          #   cmd: 'tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py'
+          #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }}
+          #   timeout: 60
           - name: 'Rag tests'
             cmd: 'tests/python_tests/test_rag.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }}
@@ -457,7 +465,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
     env:
       INSTALL_DIR: ${{ github.workspace }}/install
       SRC_DIR: ${{ github.workspace }}/src
@@ -536,7 +544,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
     env:
       INSTALL_DIR: ${{ github.workspace }}/ov
       SRC_DIR: ${{ github.workspace }}/src
@@ -636,7 +644,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
     env:
       INSTALL_DIR: ${{ github.workspace }}/ov
       SRC_DIR: ${{ github.workspace }}/src
@@ -693,7 +701,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: macos-13
+    runs-on: macos-14
 
     env:
       SRC_DIR: ${{ github.workspace }}/openvino.genai
diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
index f01510ea4..0d76aac16 100644
--- a/samples/export-requirements.txt
+++ b/samples/export-requirements.txt
@@ -10,7 +10,7 @@ diffusers==0.34.0 # For image generation pipelines
 timm==1.0.19  # For exporting InternVL2
 # torchvision for visual language models
 torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
-torchvision==0.23.0+cpu; platform_system != "Darwin" or platform_machine != "x86_64"
+torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
 transformers==4.52.4 # For Whisper
 hf_transfer==0.1.9  # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
 backoff==2.2.1  # for microsoft/Phi-3.5-vision-instruct
diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt
index 8692f9ba9..12ef85738 100644
--- a/tests/python_tests/requirements.txt
+++ b/tests/python_tests/requirements.txt
@@ -18,7 +18,7 @@ langchain-core==0.3.75
 einops==0.8.1
 # - openbmb/MiniCPM-V-2
 torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
-torchvision==0.23.0+cpu; platform_system != "Darwin" or platform_machine != "x86_64"
+torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
 # - openbmb/MiniCPM-V-2
 timm==1.0.19
 # - openai/whisper-base
diff --git a/tests/python_tests/test_gguf_reader.py b/tests/python_tests/test_gguf_reader.py
index 7451d77aa..d81d5334b 100644
--- a/tests/python_tests/test_gguf_reader.py
+++ b/tests/python_tests/test_gguf_reader.py
@@ -3,6 +3,7 @@
 
 
 import pytest
+import platform
 import torch
 import gc
 import sys
@@ -112,6 +113,8 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model):
 
 @pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
 @pytest.mark.parametrize("model_ids", [{"gguf_model_id": "Qwen/Qwen3-0.6B-GGUF", "gguf_filename": "Qwen3-0.6B-Q8_0.gguf"}])
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 172335")
 @pytest.mark.precommit
 def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids):
     # Temporal testing solution until transformers starts to support qwen3 in GGUF format
diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py
index 1e5302814..b5ee89c53 100644
--- a/tests/python_tests/test_whisper_pipeline.py
+++ b/tests/python_tests/test_whisper_pipeline.py
@@ -3,7 +3,9 @@
 
 import openvino_genai as ov_genai
 import functools
+import platform
 import pytest
+import sys
 import openvino_tokenizers
 import openvino
 import datasets
@@ -457,6 +459,8 @@ def test_language_autodetect(model_descr, sample_from_dataset):
 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
 @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_return_timestamps_short_form(model_descr, sample_from_dataset):
     run_pipeline_with_ref(
         model_id=model_descr[0],
@@ -469,6 +473,8 @@ def test_return_timestamps_short_form(model_descr, sample_from_dataset):
 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
 @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=1)], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_dataset):
     run_pipeline_with_ref(
         model_id=model_descr[0],
@@ -483,6 +489,8 @@ def test_return_timestamps_max_new_tokens_short_form(model_descr, sample_from_da
 @pytest.mark.parametrize("model_descr", get_whisper_models_list())
 @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=10, long_form=True)], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_longform_audio(model_descr, sample_from_dataset):
     _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr)
 
@@ -509,6 +517,8 @@ def test_longform_audio(model_descr, sample_from_dataset):
 @pytest.mark.parametrize("model_descr", get_whisper_models_list())
 @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_longform_audio_with_past(model_descr, sample_from_dataset):
     _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr, stateful=True)
 
@@ -534,6 +544,8 @@ def test_longform_audio_with_past(model_descr, sample_from_dataset):
 
 @pytest.mark.parametrize("model_descr", get_whisper_models_list())
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_shortform(model_descr):
     samples = []
     ds = datasets.load_dataset(
@@ -553,6 +565,8 @@ def test_shortform(model_descr):
 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
 @pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_beam_search(model_descr, sample_from_dataset):
     # use only 30 seconds of audio due to beam search results wrong with enabled timestamps
     # ticket: 167239
@@ -631,6 +645,8 @@ def test_random_sampling(model_descr, sample_from_dataset):
 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
 @pytest.mark.parametrize("sample_from_dataset", [{"language" : "en", "sample_id": 0}], indirect=True)
 @pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin" and platform.machine() in ('arm', 'armv7l',
+                                                                                  'aarch64', 'arm64', 'ARM64')), reason="Ticket - 173169")
 def test_perf_metrics(model_descr, sample_from_dataset):
     model_id, path, hf_pipe, genai_pipe = read_whisper_model(model_descr)
 
diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py
index b354056da..0536fc044 100644
--- a/tools/who_what_benchmark/tests/test_cli_image.py
+++ b/tools/who_what_benchmark/tests/test_cli_image.py
@@ -1,7 +1,9 @@
 import itertools
 import subprocess  # nosec B404
 import os
+import sys
 import shutil
+import platform
 import pytest
 import logging
 import tempfile
@@ -108,7 +110,17 @@ def test_image_model_genai(model_id, model_type, tmp_path):
         pytest.skip(reason="FLUX-Fill is supported as inpainting only")
     if model_type == "image-inpainting":
         pytest.xfail("Segfault. Ticket 170877")
-
+    
+    mac_arm64_skip = ('stable-diffusion-xl-image-to-image' in model_id or
+                      'stable-diffusion-3-tiny-random-image-to-image' in model_id or
+                      'stable-diffusion-3-tiny-random-text-to-image' in model_id or
+                      'tiny-random-flux' in model_id)
+
+    if mac_arm64_skip \
+        and (sys.platform == "darwin") \
+        and (platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64')):
+        pytest.xfail("Ticket 173169")
+    
     GT_FILE = tmp_path / "gt.csv"
     MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
 
diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py
index f564d53c0..e40c2d4fa 100644
--- a/tools/who_what_benchmark/tests/test_cli_text.py
+++ b/tools/who_what_benchmark/tests/test_cli_text.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 import tempfile
+import platform
 import pandas as pd
 import pytest
 import logging
@@ -49,6 +50,7 @@ def teardown_module():
     shutil.rmtree(tmp_dir)
 
 
+@pytest.mark.skipif((sys.platform == "darwin") and (platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64')), reason='173169')
 def test_text_target_model():
     run_wwb([
         "--base-model",