Skip to content

Commit 42e7a34

Browse files
committed
Modify hardcoded video_len check that fails with increased number of tests
1 parent f18b738 commit 42e7a34

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

tests/models/qwen2_5_omni/test_processor_qwen2_5_omni.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,15 @@ def _test_apply_chat_template(
422422
self.assertEqual(len(out_dict["input_ids"]), batch_size)
423423
self.assertEqual(len(out_dict["attention_mask"]), batch_size)
424424

425-
video_len = 2880 if batch_size == 1 else 5808 # qwen pixels don't scale with bs same way as other models
426-
mm_len = batch_size * 1564 if modality == "image" else video_len
425+
if modality == "video":
426+
# qwen pixels don't scale with bs same way as other models, calulate expected video token count based on video_grid_thw
427+
# TODO: update expected video token count calculation based on the internal processing logic of Qwen2_5OmniProcessor
428+
expected_video_token_count = 0
429+
for thw in out_dict["video_grid_thw"]:
430+
expected_video_token_count += thw[0] * thw[1] * thw[2]
431+
mm_len = expected_video_token_count
432+
else:
433+
mm_len = batch_size * 1564
427434
self.assertEqual(len(out_dict[input_name]), mm_len)
428435

429436
return_tensor_to_type = {"pt": torch.Tensor, "np": np.ndarray, None: list}

tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,15 @@ def _test_apply_chat_template(
239239
self.assertEqual(len(out_dict["input_ids"]), batch_size)
240240
self.assertEqual(len(out_dict["attention_mask"]), batch_size)
241241

242-
video_len = 180 if batch_size == 1 else 320 # qwen pixels don't scale with bs same way as other models
243-
mm_len = batch_size * 192 if modality == "image" else video_len
242+
if modality == "video":
243+
# qwen pixels don't scale with bs same way as other models, calulate expected video token count based on video_grid_thw
244+
# TODO: update expected video token count calculation based on the internal processing logic of Qwen2_5VLProcessor
245+
expected_video_token_count = 0
246+
for thw in out_dict["video_grid_thw"]:
247+
expected_video_token_count += thw[0] * thw[1] * thw[2]
248+
mm_len = expected_video_token_count
249+
else:
250+
mm_len = batch_size * 192
244251
self.assertEqual(len(out_dict[input_name]), mm_len)
245252

246253
return_tensor_to_type = {"pt": torch.Tensor, "np": np.ndarray, None: list}

tests/models/qwen2_vl/test_processor_qwen2_vl.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,15 @@ def _test_apply_chat_template(
239239
self.assertTrue(input_name in out_dict)
240240
self.assertEqual(len(out_dict["input_ids"]), batch_size)
241241
self.assertEqual(len(out_dict["attention_mask"]), batch_size)
242-
243-
video_len = 180 if batch_size == 1 else 320 # qwen pixels don't scale with bs same way as other models
244-
mm_len = batch_size * 192 if modality == "image" else video_len
242+
if modality == "video":
243+
# qwen pixels don't scale with bs same way as other models, calulate expected video token count based on video_grid_thw
244+
# TODO: update expected video token count calculation based on the internal processing logic of Qwen2VLProcessor
245+
expected_video_token_count = 0
246+
for thw in out_dict["video_grid_thw"]:
247+
expected_video_token_count += thw[0] * thw[1] * thw[2]
248+
mm_len = expected_video_token_count
249+
else:
250+
mm_len = batch_size * 192
245251
self.assertEqual(len(out_dict[input_name]), mm_len)
246252

247253
return_tensor_to_type = {"pt": torch.Tensor, "np": np.ndarray, None: list}

0 commit comments

Comments
 (0)