Skip to content

Commit 841f3d4

Browse files
committed
Merge remote-tracking branch 'upstream/main' into mvafin/bitnet_support
2 parents 31981ea + 3c8e4ba commit 841f3d4

File tree

10 files changed

+103
-84
lines changed

10 files changed

+103
-84
lines changed

.github/workflows/build_documentation.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ on:
88
- doc-builder*
99
- v*-release
1010

11+
env:
12+
UV_SYSTEM_PYTHON: 1
13+
UV_TORCH_BACKEND: auto
14+
1115
jobs:
1216
build_documentation:
1317
runs-on: ubuntu-22.04
@@ -21,13 +25,13 @@ jobs:
2125
- uses: actions/checkout@v4
2226
- uses: actions/setup-node@v4
2327
with:
24-
node-version: '18'
28+
node-version: "18"
2529
cache-dependency-path: "kit/package-lock.json"
2630

2731
- name: Set up Python
2832
uses: actions/setup-python@v4
2933
with:
30-
python-version: '3.11'
34+
python-version: "3.11"
3135

3236
- name: Set environment variables
3337
run: |
@@ -45,11 +49,9 @@ jobs:
4549
4650
- name: Setup environment
4751
run: |
48-
python -m pip install --upgrade pip
49-
python -m pip install --upgrade setuptools
50-
python -m pip install git+https://github.com/huggingface/doc-builder
51-
python -m pip install .[quality]
52-
python -m pip install openvino nncf neural-compressor[pt] diffusers accelerate
52+
pip install --upgrade pip uv
53+
uv pip install git+https://github.com/huggingface/doc-builder
54+
uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate
5355
5456
- name: Make documentation
5557
shell: bash

.github/workflows/build_pr_documentation.yml

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@ concurrency:
99
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
1010
cancel-in-progress: true
1111

12+
env:
13+
UV_SYSTEM_PYTHON: 1
14+
UV_TORCH_BACKEND: auto
15+
1216
jobs:
1317
build_documentation:
1418
runs-on: ubuntu-22.04
15-
1619
env:
1720
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
1821
PR_NUMBER: ${{ github.event.number }}
@@ -21,42 +24,34 @@ jobs:
2124

2225
steps:
2326
- uses: actions/checkout@v4
27+
- uses: actions/setup-node@v4
2428
with:
25-
repository: "huggingface/doc-builder"
26-
path: doc-builder
29+
node-version: "18"
30+
cache-dependency-path: "kit/package-lock.json"
2731

28-
- uses: actions/checkout@v4
32+
- name: Set up Python
33+
uses: actions/setup-python@v4
2934
with:
30-
repository: "huggingface/optimum-intel"
31-
path: optimum-intel
32-
33-
- name: Setup Python
34-
uses: actions/setup-python@v5
35-
with:
36-
python-version: 3.9
35+
python-version: "3.11"
3736

3837
- name: Setup environment
3938
run: |
40-
pip install --upgrade pip
41-
pip uninstall -y doc-builder
42-
cd doc-builder
43-
git pull origin main
44-
pip install .
45-
pip install black
46-
cd ..
39+
pip install --upgrade pip uv
40+
uv pip install git+https://github.com/huggingface/doc-builder
41+
uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate
4742
4843
- name: Make documentation
44+
shell: bash
4945
run: |
50-
cd optimum-intel
51-
make doc BUILD_DIR=intel-doc-build VERSION=pr_$PR_NUMBER COMMIT_SHA_SUBPACKAGE=$COMMIT_SHA CLONE_URL=$PR_CLONE_URL
52-
cd ..
53-
54-
- name: Save commit_sha & pr_number
55-
run: |
56-
cd optimum-intel
57-
sudo chmod -R ugo+rwx intel-doc-build
46+
doc-builder build optimum.intel docs/source/ \
47+
--repo_name optimum-intel \
48+
--build_dir intel-doc-build/ \
49+
--version pr_${{ env.PR_NUMBER }} \
50+
--version_tag_suffix "" \
51+
--html \
52+
--clean
5853
cd intel-doc-build
59-
sudo mv optimum.intel optimum-intel
54+
mv optimum.intel optimum-intel
6055
echo ${{ env.COMMIT_SHA }} > ./commit_sha
6156
echo ${{ env.PR_NUMBER }} > ./pr_number
6257

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,12 @@ doc: build_doc_docker_image
5959
--version_tag_suffix "" \
6060
--html \
6161
--clean
62+
63+
clean:
64+
rm -rf build
65+
rm -rf dist
66+
rm -rf .pytest_cache
67+
rm -rf .ruff_cache
68+
rm -rf .mypy_cache
69+
rm -rf optimum_intel.egg-info
70+
rm -rf *__pycache__

docs/Dockerfile

Lines changed: 0 additions & 28 deletions
This file was deleted.

optimum/exporters/openvino/model_configs.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ def init_model_configs():
164164
"transformers",
165165
"AutoModelForImageTextToText",
166166
)
167-
168167
TasksManager._CUSTOM_CLASSES[("pt", "llava_next_video", "image-text-to-text")] = (
169168
"transformers",
170169
"AutoModelForVision2Seq",
@@ -300,21 +299,46 @@ def patch_model_for_export(
300299
return Qwen2MoEPatcher(self, model, model_kwargs=model_kwargs)
301300

302301

303-
@register_in_tasks_manager("qwen3", *["text-generation", "text-generation-with-past"], library_name="transformers")
302+
@register_in_tasks_manager(
303+
"qwen3",
304+
*[
305+
"text-generation",
306+
"text-generation-with-past",
307+
"feature-extraction",
308+
"feature-extraction-with-past",
309+
"text-classification",
310+
],
311+
library_name="transformers",
312+
)
304313
class Qwen3OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
305314
MIN_TRANSFORMERS_VERSION = "4.51.0"
306315

307316
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator)
308317
DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator
309318
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
310319

320+
@property
321+
def inputs(self) -> Dict[str, Dict[int, str]]:
322+
if self.task in ["feature-extraction"]:
323+
common_inputs = {
324+
"input_ids": {0: "batch_size", 1: "sequence_length"},
325+
"attention_mask": {0: "batch_size", 1: "sequence_length"},
326+
}
327+
else:
328+
common_inputs = super().inputs
329+
return common_inputs
330+
311331
def patch_model_for_export(
312332
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
313333
) -> "ModelPatcher":
314334
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
315335

316336

317-
@register_in_tasks_manager("qwen3_moe", *["text-generation", "text-generation-with-past"], library_name="transformers")
337+
@register_in_tasks_manager(
338+
"qwen3_moe",
339+
*["text-generation", "text-generation-with-past", "feature-extraction", "feature-extraction-with-past"],
340+
library_name="transformers",
341+
)
318342
class Qwen3MoEOpenVINOConfig(Qwen3OpenVINOConfig):
319343
def patch_model_for_export(
320344
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
@@ -3501,7 +3525,11 @@ class Qwen2VLConfigBehavior(str, enum.Enum):
35013525
TEXT_EMBEDDINGS = "text_embeddings"
35023526

35033527

3504-
@register_in_tasks_manager("qwen2_vl", *["image-text-to-text", "video-text-to-text"], library_name="transformers")
3528+
@register_in_tasks_manager(
3529+
"qwen2_vl",
3530+
*["image-text-to-text", "video-text-to-text"],
3531+
library_name="transformers",
3532+
)
35053533
class Qwen2VLOpenVINOConfig(BaseVLMOpenVINOConfig):
35063534
SUPPORTED_BEHAVIORS = [model_type.value for model_type in Qwen2VLConfigBehavior]
35073535
NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
@@ -3634,7 +3662,11 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
36343662
return {}
36353663

36363664

3637-
@register_in_tasks_manager("qwen2_5_vl", *["image-text-to-text", "video-text-to-text"], library_name="transformers")
3665+
@register_in_tasks_manager(
3666+
"qwen2_5_vl",
3667+
*["image-text-to-text", "video-text-to-text"],
3668+
library_name="transformers",
3669+
)
36383670
class Qwen2_5_VLOpenVINOConfig(Qwen2VLOpenVINOConfig):
36393671
MIN_TRANSFORMERS_VERSION = version.parse("4.49.0")
36403672

optimum/exporters/openvino/model_patcher.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -356,14 +356,10 @@ def __enter__(self):
356356
# Although I'm not sure this is the right way to handle this, we are basically pretending that -65,504 is -inf
357357
ALL_MASK_ATTENTION_FUNCTIONS.register("eager", eager_mask_without_vmap)
358358

359-
# for non-stateful decoder models, we use eager mask without vmap for sdpa as well
360-
# to avoid a nan output issue in OpenVINO that only happens in case of non-stateful models
361-
if not getattr(self.real_config, "stateful", False):
362-
logger.warning(
363-
"Exporting a non-stateful decoder model currently results in a nan output in OpenVINO. "
364-
"There might be a performance impact due to the use of eager mask (floats) instead of sdpa mask (bools). "
365-
)
366-
ALL_MASK_ATTENTION_FUNCTIONS.register("sdpa", eager_mask_without_vmap)
359+
# for decoder models, we use eager mask without vmap for sdpa as well
360+
# to avoid a nan output issue in OpenVINO that only happens in case of:
361+
# non-stateful models on cpu and stateful models on npu
362+
ALL_MASK_ATTENTION_FUNCTIONS.register("sdpa", eager_mask_without_vmap)
367363

368364
def __exit__(self, exc_type, exc_value, traceback):
369365
super().__exit__(exc_type, exc_value, traceback)
@@ -4771,14 +4767,10 @@ def __enter__(self):
47714767
# Although I'm not sure this is the right way to handle this, we are basically pretending that -65,504 is -inf
47724768
ALL_MASK_ATTENTION_FUNCTIONS.register("eager", eager_mask_without_vmap)
47734769

4774-
# for non-stateful decoder models, we use eager mask without vmap for sdpa as well
4775-
# to avoid a nan output issue in OpenVINO that only happens in case of non-stateful models
4776-
if not getattr(self.real_config, "stateful", False):
4777-
logger.warning(
4778-
"Exporting a non-stateful decoder model currently results in a nan output in OpenVINO. "
4779-
"There might be a performance impact due to the use of eager mask (floats) instead of sdpa mask (bools). "
4780-
)
4781-
ALL_MASK_ATTENTION_FUNCTIONS.register("sdpa", eager_mask_without_vmap)
4770+
# for decoder models, we use eager mask without vmap for sdpa as well
4771+
# to avoid a nan output issue in OpenVINO that only happens in case of:
4772+
# non-stateful models on cpu and stateful models on npu
4773+
ALL_MASK_ATTENTION_FUNCTIONS.register("sdpa", eager_mask_without_vmap)
47824774

47834775
def __exit__(self, exc_type, exc_value, traceback):
47844776
super().__exit__(exc_type, exc_value, traceback)

optimum/intel/openvino/modeling_diffusion.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@
122122
SanaSprintPipeline = object
123123

124124

125+
if is_diffusers_version(">=", "0.35.0"):
126+
from diffusers.models.cache_utils import CacheMixin
127+
else:
128+
CacheMixin = object
129+
125130
DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
126131
DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3"
127132

@@ -1072,7 +1077,7 @@ def __call__(self, *args, **kwargs):
10721077
return self.auto_model_class.__call__(self, *args, **kwargs)
10731078

10741079

1075-
class OVPipelinePart(ConfigMixin):
1080+
class OVPipelinePart(ConfigMixin, CacheMixin):
10761081
config_name: str = CONFIG_NAME
10771082

10781083
def __init__(
@@ -1161,6 +1166,11 @@ def __call__(self, *args, **kwargs):
11611166
def modules(self):
11621167
return []
11631168

1169+
def named_modules(self):
1170+
# starting from diffusers 0.35.0 some model parts inherit from `CacheMixin` which uses `named_modules` method
1171+
# to register some hooks for attention caching, we return empty list here since it can't be used with OpenVINO
1172+
yield from []
1173+
11641174

11651175
class OVModelTextEncoder(OVPipelinePart):
11661176
def __init__(self, model: openvino.Model, parent_pipeline: OVDiffusionPipeline, model_name: str = ""):

tests/openvino/test_export.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ class ExportModelTest(unittest.TestCase):
9898
{"stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline, "ltx-video": OVLTXPipeline}
9999
)
100100

101+
if is_transformers_version(">=", "4.51"):
102+
SUPPORTED_ARCHITECTURES.update({"qwen3": OVModelForFeatureExtraction})
103+
101104
if is_transformers_version(">=", "4.54"):
102105
SUPPORTED_ARCHITECTURES.update({"ernie4_5": OVModelForCausalLM})
103106

tests/openvino/test_exporters_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class OVCLIExportTestCase(unittest.TestCase):
146146
"mamba": 2,
147147
"falcon-mamba": 2,
148148
"ernie4_5": 2,
149+
"qwen3": 2,
149150
}
150151

151152
TOKENIZER_CHAT_TEMPLATE_TESTS_MODELS = {

tests/openvino/test_modeling.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,9 @@ class OVModelForFeatureExtractionIntegrationTest(unittest.TestCase):
10281028
"sentence-transformers-bert",
10291029
)
10301030

1031+
if is_transformers_version(">=", "4.51.0"):
1032+
SUPPORTED_ARCHITECTURES += ("qwen3",)
1033+
10311034
@parameterized.expand(SUPPORTED_ARCHITECTURES)
10321035
def test_compare_to_transformers(self, model_arch):
10331036
model_id = MODEL_NAMES[model_arch]

0 commit comments

Comments
 (0)