Skip to content

Commit 0f4671a

Browse files
committed
Revive create_tiles implementation and all related functions
1 parent cdeb9ee commit 0f4671a

File tree

1 file changed

+345
-3
lines changed

1 file changed

+345
-3
lines changed

supervision/utils/image.py

Lines changed: 345 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
from __future__ import annotations
22

3+
import itertools
4+
import math
35
import os
46
import shutil
5-
from typing import Any, Literal
7+
from collections.abc import Callable
8+
from functools import partial
9+
from typing import Literal
610

711
import cv2
812
import numpy as np
@@ -12,10 +16,18 @@
1216

1317
from supervision.annotators.base import ImageType
1418
from supervision.draw.color import Color, unify_to_bgr
19+
from supervision.draw.utils import calculate_optimal_text_scale, draw_text
1520
from supervision.geometry.core import Point
1621
from supervision.utils.conversion import (
22+
cv2_to_pillow,
1723
ensure_cv2_image_for_standalone_function,
24+
images_to_cv2,
1825
)
26+
from supervision.utils.iterables import create_batches, fill
27+
28+
RelativePosition = Literal["top", "bottom"]
29+
30+
MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3
1931

2032

2133
def crop_image(
@@ -654,6 +666,336 @@ def create_tiles(
654666
titles_padding: int = 10,
655667
titles_text_font: int = cv2.FONT_HERSHEY_SIMPLEX,
656668
titles_background_color: tuple[int, int, int] | Color = Color.from_hex("#D9D9D9"),
657-
default_title_placement: Any = "top",
669+
default_title_placement: RelativePosition = "top",
658670
) -> ImageType:
659-
return None
671+
"""
672+
Creates tiles mosaic from input images, automating grid placement and
673+
converting images to common resolution maintaining aspect ratio. It is
674+
also possible to render text titles on tiles, using optional set of
675+
parameters specifying text drawing (see parameters description).
676+
677+
Automated grid placement will try to maintain square shape of grid
678+
(with size being the nearest integer square root of #images), up to two exceptions:
679+
* if there are up to 3 images - images will be displayed in single row
680+
* if square-grid placement causes last row to be empty - number of rows is trimmed
681+
until last row has at least one image
682+
683+
Args:
684+
images (List[ImageType]): Images to create tiles. Elements can be either
685+
np.ndarray or PIL.Image, common representation will be agreed by the
686+
function.
687+
grid_size (Optional[Tuple[Optional[int], Optional[int]]]): Expected grid
688+
size in format (n_rows, n_cols). If not given - automated grid placement
689+
will be applied. One may also provide only one out of two elements of the
690+
tuple - then grid will be created with either n_rows or n_cols fixed,
691+
leaving the other dimension to be adjusted by the number of images
692+
single_tile_size (Optional[Tuple[int, int]]): sizeof a single tile element
693+
provided in (width, height) format. If not given - size of tile will be
694+
automatically calculated based on `tile_scaling` parameter.
695+
tile_scaling (Literal["min", "max", "avg"]): If `single_tile_size` is not
696+
given - parameter will be used to calculate tile size - using
697+
min / max / avg size of image provided in `images` list.
698+
tile_padding_color (Union[Tuple[int, int, int], sv.Color]): Color to be used in
699+
images letterbox procedure (while standardising tiles sizes) as a padding.
700+
If tuple provided - should be BGR.
701+
tile_margin (int): size of margin between tiles (in pixels)
702+
tile_margin_color (Union[Tuple[int, int, int], sv.Color]): Color of tile margin.
703+
If tuple provided - should be BGR.
704+
return_type (Literal["auto", "cv2", "pillow"]): Parameter dictates the format of
705+
return image. One may choose specific type ("cv2" or "pillow") to enforce
706+
conversion. "auto" mode takes a majority vote between types of elements in
707+
`images` list - resolving draws in favour of OpenCV format. "auto" can be
708+
safely used when all input images are of the same type.
709+
titles (Optional[List[Optional[str]]]): Optional titles to be added to tiles.
710+
Elements of that list may be empty - then specific tile (in order presented
711+
in `images` parameter) will not be filled with title. It is possible to
712+
provide list of titles shorter than `images` - then remaining titles will
713+
be assumed empty.
714+
titles_anchors (Optional[Union[Point, List[Optional[Point]]]]): Parameter to
715+
specify anchor points for titles. It is possible to specify anchor either
716+
globally or for specific tiles (following order of `images`).
717+
If not given (either globally, or for specific element of the list),
718+
it will be calculated automatically based on `default_title_placement`.
719+
titles_color (Union[Tuple[int, int, int], Color]): Color of titles text.
720+
If tuple provided - should be BGR.
721+
titles_scale (Optional[float]): Scale of titles. If not provided - value will
722+
be calculated using `calculate_optimal_text_scale(...)`.
723+
titles_thickness (int): Thickness of titles text.
724+
titles_padding (int): Size of titles padding.
725+
titles_text_font (int): Font to be used to render titles. Must be integer
726+
constant representing OpenCV font.
727+
(See docs: https://docs.opencv.org/4.x/d6/d6e/group__imgproc__draw.html)
728+
titles_background_color (Union[Tuple[int, int, int], Color]): Color of title
729+
text padding.
730+
default_title_placement (Literal["top", "bottom"]): Parameter specifies title
731+
anchor placement in case if explicit anchor is not provided.
732+
733+
Returns:
734+
ImageType: Image with all input images located in tails grid. The output type is
735+
determined by `return_type` parameter.
736+
737+
Raises:
738+
ValueError: In case when input images list is empty, provided `grid_size` is too
739+
small to fit all images, `tile_scaling` mode is invalid.
740+
"""
741+
if len(images) == 0:
742+
raise ValueError("Could not create image tiles from empty list of images.")
743+
if return_type == "auto":
744+
return_type = _negotiate_tiles_format(images=images)
745+
tile_padding_color = unify_to_bgr(color=tile_padding_color)
746+
tile_margin_color = unify_to_bgr(color=tile_margin_color)
747+
images = images_to_cv2(images=images)
748+
if single_tile_size is None:
749+
single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling)
750+
resized_images = [
751+
letterbox_image(
752+
image=i, resolution_wh=single_tile_size, color=tile_padding_color
753+
)
754+
for i in images
755+
]
756+
grid_size = _establish_grid_size(images=images, grid_size=grid_size)
757+
if len(images) > grid_size[0] * grid_size[1]:
758+
raise ValueError(
759+
f"Could not place {len(images)} in grid with size: {grid_size}."
760+
)
761+
if titles is not None:
762+
titles = fill(sequence=titles, desired_size=len(images), content=None)
763+
titles_anchors = (
764+
[titles_anchors]
765+
if not issubclass(type(titles_anchors), list)
766+
else titles_anchors
767+
)
768+
titles_anchors = fill(
769+
sequence=titles_anchors, desired_size=len(images), content=None
770+
)
771+
titles_color = unify_to_bgr(color=titles_color)
772+
titles_background_color = unify_to_bgr(color=titles_background_color)
773+
tiles = _generate_tiles(
774+
images=resized_images,
775+
grid_size=grid_size,
776+
single_tile_size=single_tile_size,
777+
tile_padding_color=tile_padding_color,
778+
tile_margin=tile_margin,
779+
tile_margin_color=tile_margin_color,
780+
titles=titles,
781+
titles_anchors=titles_anchors,
782+
titles_color=titles_color,
783+
titles_scale=titles_scale,
784+
titles_thickness=titles_thickness,
785+
titles_padding=titles_padding,
786+
titles_text_font=titles_text_font,
787+
titles_background_color=titles_background_color,
788+
default_title_placement=default_title_placement,
789+
)
790+
if return_type == "pillow":
791+
tiles = cv2_to_pillow(image=tiles)
792+
return tiles
793+
794+
795+
def _negotiate_tiles_format(images: list[ImageType]) -> Literal["cv2", "pillow"]:
796+
number_of_np_arrays = sum(issubclass(type(i), np.ndarray) for i in images)
797+
if number_of_np_arrays >= (len(images) // 2):
798+
return "cv2"
799+
return "pillow"
800+
801+
802+
def _calculate_aggregated_images_shape(
803+
images: list[np.ndarray], aggregator: Callable[[list[int]], float]
804+
) -> tuple[int, int]:
805+
height = round(aggregator([i.shape[0] for i in images]))
806+
width = round(aggregator([i.shape[1] for i in images]))
807+
return width, height
808+
809+
810+
SHAPE_AGGREGATION_FUN = {
811+
"min": partial(_calculate_aggregated_images_shape, aggregator=np.min),
812+
"max": partial(_calculate_aggregated_images_shape, aggregator=np.max),
813+
"avg": partial(_calculate_aggregated_images_shape, aggregator=np.average),
814+
}
815+
816+
817+
def _aggregate_images_shape(
818+
images: list[np.ndarray], mode: Literal["min", "max", "avg"]
819+
) -> tuple[int, int]:
820+
if mode not in SHAPE_AGGREGATION_FUN:
821+
raise ValueError(
822+
f"Could not aggregate images shape - provided unknown mode: {mode}. "
823+
f"Supported modes: {list(SHAPE_AGGREGATION_FUN.keys())}."
824+
)
825+
return SHAPE_AGGREGATION_FUN[mode](images)
826+
827+
828+
def _establish_grid_size(
829+
images: list[np.ndarray], grid_size: tuple[int | None, int | None] | None
830+
) -> tuple[int, int]:
831+
if grid_size is None or all(e is None for e in grid_size):
832+
return _negotiate_grid_size(images=images)
833+
if grid_size[0] is None:
834+
return math.ceil(len(images) / grid_size[1]), grid_size[1]
835+
if grid_size[1] is None:
836+
return grid_size[0], math.ceil(len(images) / grid_size[0])
837+
return grid_size
838+
839+
840+
def _negotiate_grid_size(images: list[np.ndarray]) -> tuple[int, int]:
841+
if len(images) <= MAX_COLUMNS_FOR_SINGLE_ROW_GRID:
842+
return 1, len(images)
843+
nearest_sqrt = math.ceil(np.sqrt(len(images)))
844+
proposed_columns = nearest_sqrt
845+
proposed_rows = nearest_sqrt
846+
while proposed_columns * (proposed_rows - 1) >= len(images):
847+
proposed_rows -= 1
848+
return proposed_rows, proposed_columns
849+
850+
851+
def _generate_tiles(
852+
images: list[np.ndarray],
853+
grid_size: tuple[int, int],
854+
single_tile_size: tuple[int, int],
855+
tile_padding_color: tuple[int, int, int],
856+
tile_margin: int,
857+
tile_margin_color: tuple[int, int, int],
858+
titles: list[str | None] | None,
859+
titles_anchors: list[Point | None],
860+
titles_color: tuple[int, int, int],
861+
titles_scale: float | None,
862+
titles_thickness: int,
863+
titles_padding: int,
864+
titles_text_font: int,
865+
titles_background_color: tuple[int, int, int],
866+
default_title_placement: RelativePosition,
867+
) -> np.ndarray:
868+
images = _draw_texts(
869+
images=images,
870+
titles=titles,
871+
titles_anchors=titles_anchors,
872+
titles_color=titles_color,
873+
titles_scale=titles_scale,
874+
titles_thickness=titles_thickness,
875+
titles_padding=titles_padding,
876+
titles_text_font=titles_text_font,
877+
titles_background_color=titles_background_color,
878+
default_title_placement=default_title_placement,
879+
)
880+
rows, columns = grid_size
881+
tiles_elements = list(create_batches(sequence=images, batch_size=columns))
882+
while len(tiles_elements[-1]) < columns:
883+
tiles_elements[-1].append(
884+
_generate_color_image(shape=single_tile_size, color=tile_padding_color)
885+
)
886+
while len(tiles_elements) < rows:
887+
tiles_elements.append(
888+
[_generate_color_image(shape=single_tile_size, color=tile_padding_color)]
889+
* columns
890+
)
891+
return _merge_tiles_elements(
892+
tiles_elements=tiles_elements,
893+
grid_size=grid_size,
894+
single_tile_size=single_tile_size,
895+
tile_margin=tile_margin,
896+
tile_margin_color=tile_margin_color,
897+
)
898+
899+
900+
def _draw_texts(
901+
images: list[np.ndarray],
902+
titles: list[str | None] | None,
903+
titles_anchors: list[Point | None],
904+
titles_color: tuple[int, int, int],
905+
titles_scale: float | None,
906+
titles_thickness: int,
907+
titles_padding: int,
908+
titles_text_font: int,
909+
titles_background_color: tuple[int, int, int],
910+
default_title_placement: RelativePosition,
911+
) -> list[np.ndarray]:
912+
if titles is None:
913+
return images
914+
titles_anchors = _prepare_default_titles_anchors(
915+
images=images,
916+
titles_anchors=titles_anchors,
917+
default_title_placement=default_title_placement,
918+
)
919+
if titles_scale is None:
920+
image_height, image_width = images[0].shape[:2]
921+
titles_scale = calculate_optimal_text_scale(
922+
resolution_wh=(image_width, image_height)
923+
)
924+
result = []
925+
for image, text, anchor in zip(images, titles, titles_anchors):
926+
if text is None:
927+
result.append(image)
928+
continue
929+
processed_image = draw_text(
930+
scene=image,
931+
text=text,
932+
text_anchor=anchor,
933+
text_color=Color.from_bgr_tuple(titles_color),
934+
text_scale=titles_scale,
935+
text_thickness=titles_thickness,
936+
text_padding=titles_padding,
937+
text_font=titles_text_font,
938+
background_color=Color.from_bgr_tuple(titles_background_color),
939+
)
940+
result.append(processed_image)
941+
return result
942+
943+
944+
def _prepare_default_titles_anchors(
945+
images: list[np.ndarray],
946+
titles_anchors: list[Point | None],
947+
default_title_placement: RelativePosition,
948+
) -> list[Point]:
949+
result = []
950+
for image, anchor in zip(images, titles_anchors):
951+
if anchor is not None:
952+
result.append(anchor)
953+
continue
954+
image_height, image_width = image.shape[:2]
955+
if default_title_placement == "top":
956+
default_anchor = Point(x=image_width / 2, y=image_height * 0.1)
957+
else:
958+
default_anchor = Point(x=image_width / 2, y=image_height * 0.9)
959+
result.append(default_anchor)
960+
return result
961+
962+
963+
def _merge_tiles_elements(
964+
tiles_elements: list[list[np.ndarray]],
965+
grid_size: tuple[int, int],
966+
single_tile_size: tuple[int, int],
967+
tile_margin: int,
968+
tile_margin_color: tuple[int, int, int],
969+
) -> np.ndarray:
970+
vertical_padding = (
971+
np.ones((single_tile_size[1], tile_margin, 3)) * tile_margin_color
972+
)
973+
merged_rows = [
974+
np.concatenate(
975+
list(
976+
itertools.chain.from_iterable(
977+
zip(row, [vertical_padding] * grid_size[1])
978+
)
979+
)[:-1],
980+
axis=1,
981+
)
982+
for row in tiles_elements
983+
]
984+
row_width = merged_rows[0].shape[1]
985+
horizontal_padding = (
986+
np.ones((tile_margin, row_width, 3), dtype=np.uint8) * tile_margin_color
987+
)
988+
rows_with_paddings = []
989+
for row in merged_rows:
990+
rows_with_paddings.append(row)
991+
rows_with_paddings.append(horizontal_padding)
992+
return np.concatenate(
993+
rows_with_paddings[:-1],
994+
axis=0,
995+
).astype(np.uint8)
996+
997+
998+
def _generate_color_image(
999+
shape: tuple[int, int], color: tuple[int, int, int]
1000+
) -> np.ndarray:
1001+
return np.ones((*shape[::-1], 3), dtype=np.uint8) * color

0 commit comments

Comments
 (0)