File tree Expand file tree Collapse file tree 2 files changed +24
-6
lines changed Expand file tree Collapse file tree 2 files changed +24
-6
lines changed Original file line number Diff line number Diff line change @@ -265,6 +265,10 @@ impl Idefics3 {
265
265
pub fn get_max_longest_edge_for_image_resize ( & self ) -> usize {
266
266
1456
267
267
}
268
+
269
+ pub fn get_max_image_size ( & self ) -> usize {
270
+ 4096
271
+ }
268
272
}
269
273
270
274
#[ derive( Clone , Debug , Serialize , Deserialize ) ]
Original file line number Diff line number Diff line change @@ -646,11 +646,10 @@ fn image_tokens(
646
646
const GLOBAL_IMG : & str = "<global-img>" ;
647
647
648
648
let max_longest_edge_for_image_resize = config. get_max_longest_edge_for_image_resize ( ) ;
649
+ let max_image_size = config. get_max_image_size ( ) ;
649
650
650
- // resize image if it is larger than max_longest_edge_for_image_resize keeping aspect ratio
651
- let ( height, width) = if height > max_longest_edge_for_image_resize
652
- || width > max_longest_edge_for_image_resize
653
- {
651
+ // resize image to max_longest_edge_for_image_resize and keep aspect ratio
652
+ let ( height, width) = {
654
653
let aspect_ratio = height as f32 / width as f32 ;
655
654
if height > width {
656
655
(
@@ -663,8 +662,23 @@ fn image_tokens(
663
662
max_longest_edge_for_image_resize,
664
663
)
665
664
}
666
- } else {
667
- ( height, width)
665
+ } ;
666
+
667
+ let ( height, width) = {
668
+ let aspect_ratio = height as f32 / width as f32 ;
669
+ if height >= width && height > max_image_size {
670
+ (
671
+ max_image_size,
672
+ ( max_image_size as f32 / aspect_ratio) as usize ,
673
+ )
674
+ } else if width > height && width > max_image_size {
675
+ (
676
+ ( max_image_size as f32 * aspect_ratio) as usize ,
677
+ max_image_size,
678
+ )
679
+ } else {
680
+ ( height, width)
681
+ }
668
682
} ;
669
683
670
684
let image_seq_len = config. get_number_of_features ( ) ;
You can’t perform that action at this time.
0 commit comments