From 61e65b5e1e2a9f0fb7f190c663138ce68bba0b28 Mon Sep 17 00:00:00 2001 From: vb Date: Thu, 4 Sep 2025 16:29:25 +0200 Subject: [PATCH] remove vision2seq vs image-text-to-text ref: https://huggingface.slack.com/archives/C070Q9GGGGY/p1756985150983899 --- src/transformers/models/auto/modeling_auto.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 5c0f8b9eff0b..959c70b1bd48 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -966,8 +966,6 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin): ("ovis2", "Ovis2ForConditionalGeneration"), ("paligemma", "PaliGemmaForConditionalGeneration"), ("pix2struct", "Pix2StructForConditionalGeneration"), - ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"), - ("qwen2_vl", "Qwen2VLForConditionalGeneration"), ("video_llava", "VideoLlavaForConditionalGeneration"), ("vipllava", "VipLlavaForConditionalGeneration"), ("vision-encoder-decoder", "VisionEncoderDecoderModel"),