Revive create_tiles implementation and all related functions

Borda · Borda · commit 0f4671ab3aa7 · 2026-01-30T14:33:39.000+01:00
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
@@ -1,8 +1,12 @@
 from __future__ import annotations
 
+import itertools
+import math
 import os
 import shutil
-from typing import Any, Literal
+from collections.abc import Callable
+from functools import partial
+from typing import Literal
 
 import cv2
 import numpy as np
@@ -12,10 +16,18 @@
 
 from supervision.annotators.base import ImageType
 from supervision.draw.color import Color, unify_to_bgr
+from supervision.draw.utils import calculate_optimal_text_scale, draw_text
 from supervision.geometry.core import Point
 from supervision.utils.conversion import (
+    cv2_to_pillow,
     ensure_cv2_image_for_standalone_function,
+    images_to_cv2,
 )
+from supervision.utils.iterables import create_batches, fill
+
+RelativePosition = Literal["top", "bottom"]
+
+MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3
 
 
 def crop_image(
@@ -654,6 +666,336 @@ def create_tiles(
     titles_padding: int = 10,
     titles_text_font: int = cv2.FONT_HERSHEY_SIMPLEX,
     titles_background_color: tuple[int, int, int] | Color = Color.from_hex("#D9D9D9"),
-    default_title_placement: Any = "top",
+    default_title_placement: RelativePosition = "top",
 ) -> ImageType:
-    return None
+    """
+    Creates tiles mosaic from input images, automating grid placement and
+    converting images to common resolution maintaining aspect ratio. It is
+    also possible to render text titles on tiles, using optional set of
+    parameters specifying text drawing (see parameters description).
+
+    Automated grid placement will try to maintain square shape of grid
+    (with size being the nearest integer square root of #images), up to two exceptions:
+    * if there are up to 3 images - images will be displayed in single row
+    * if square-grid placement causes last row to be empty - number of rows is trimmed
+        until last row has at least one image
+
+    Args:
+        images (List[ImageType]): Images to create tiles. Elements can be either
+            np.ndarray or PIL.Image, common representation will be agreed by the
+            function.
+        grid_size (Optional[Tuple[Optional[int], Optional[int]]]): Expected grid
+            size in format (n_rows, n_cols). If not given - automated grid placement
+            will be applied. One may also provide only one out of two elements of the
+            tuple - then grid will be created with either n_rows or n_cols fixed,
+            leaving the other dimension to be adjusted by the number of images
+        single_tile_size (Optional[Tuple[int, int]]): sizeof a single tile element
+            provided in (width, height) format. If not given - size of tile will be
+            automatically calculated based on `tile_scaling` parameter.
+        tile_scaling (Literal["min", "max", "avg"]): If `single_tile_size` is not
+            given - parameter will be used to calculate tile size - using
+            min / max / avg size of image provided in `images` list.
+        tile_padding_color (Union[Tuple[int, int, int], sv.Color]): Color to be used in
+            images letterbox procedure (while standardising tiles sizes) as a padding.
+            If tuple provided - should be BGR.
+        tile_margin (int): size of margin between tiles (in pixels)
+        tile_margin_color (Union[Tuple[int, int, int], sv.Color]): Color of tile margin.
+            If tuple provided - should be BGR.
+        return_type (Literal["auto", "cv2", "pillow"]): Parameter dictates the format of
+            return image. One may choose specific type ("cv2" or "pillow") to enforce
+            conversion. "auto" mode takes a majority vote between types of elements in
+            `images` list - resolving draws in favour of OpenCV format. "auto" can be
+            safely used when all input images are of the same type.
+        titles (Optional[List[Optional[str]]]): Optional titles to be added to tiles.
+            Elements of that list may be empty - then specific tile (in order presented
+            in `images` parameter) will not be filled with title. It is possible to
+            provide list of titles shorter than `images` - then remaining titles will
+            be assumed empty.
+        titles_anchors (Optional[Union[Point, List[Optional[Point]]]]): Parameter to
+            specify anchor points for titles. It is possible to specify anchor either
+            globally or for specific tiles (following order of `images`).
+            If not given (either globally, or for specific element of the list),
+            it will be calculated automatically based on `default_title_placement`.
+        titles_color (Union[Tuple[int, int, int], Color]): Color of titles text.
+            If tuple provided - should be BGR.
+        titles_scale (Optional[float]): Scale of titles. If not provided - value will
+            be calculated using `calculate_optimal_text_scale(...)`.
+        titles_thickness (int): Thickness of titles text.
+        titles_padding (int): Size of titles padding.
+        titles_text_font (int): Font to be used to render titles. Must be integer
+            constant representing OpenCV font.
+            (See docs: https://docs.opencv.org/4.x/d6/d6e/group__imgproc__draw.html)
+        titles_background_color (Union[Tuple[int, int, int], Color]): Color of title
+            text padding.
+        default_title_placement (Literal["top", "bottom"]): Parameter specifies title
+            anchor placement in case if explicit anchor is not provided.
+
+    Returns:
+        ImageType: Image with all input images located in tails grid. The output type is
+            determined by `return_type` parameter.
+
+    Raises:
+        ValueError: In case when input images list is empty, provided `grid_size` is too
+            small to fit all images, `tile_scaling` mode is invalid.
+    """
+    if len(images) == 0:
+        raise ValueError("Could not create image tiles from empty list of images.")
+    if return_type == "auto":
+        return_type = _negotiate_tiles_format(images=images)
+    tile_padding_color = unify_to_bgr(color=tile_padding_color)
+    tile_margin_color = unify_to_bgr(color=tile_margin_color)
+    images = images_to_cv2(images=images)
+    if single_tile_size is None:
+        single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling)
+    resized_images = [
+        letterbox_image(
+            image=i, resolution_wh=single_tile_size, color=tile_padding_color
+        )
+        for i in images
+    ]
+    grid_size = _establish_grid_size(images=images, grid_size=grid_size)
+    if len(images) > grid_size[0] * grid_size[1]:
+        raise ValueError(
+            f"Could not place {len(images)} in grid with size: {grid_size}."
+        )
+    if titles is not None:
+        titles = fill(sequence=titles, desired_size=len(images), content=None)
+    titles_anchors = (
+        [titles_anchors]
+        if not issubclass(type(titles_anchors), list)
+        else titles_anchors
+    )
+    titles_anchors = fill(
+        sequence=titles_anchors, desired_size=len(images), content=None
+    )
+    titles_color = unify_to_bgr(color=titles_color)
+    titles_background_color = unify_to_bgr(color=titles_background_color)
+    tiles = _generate_tiles(
+        images=resized_images,
+        grid_size=grid_size,
+        single_tile_size=single_tile_size,
+        tile_padding_color=tile_padding_color,
+        tile_margin=tile_margin,
+        tile_margin_color=tile_margin_color,
+        titles=titles,
+        titles_anchors=titles_anchors,
+        titles_color=titles_color,
+        titles_scale=titles_scale,
+        titles_thickness=titles_thickness,
+        titles_padding=titles_padding,
+        titles_text_font=titles_text_font,
+        titles_background_color=titles_background_color,
+        default_title_placement=default_title_placement,
+    )
+    if return_type == "pillow":
+        tiles = cv2_to_pillow(image=tiles)
+    return tiles
+
+
+def _negotiate_tiles_format(images: list[ImageType]) -> Literal["cv2", "pillow"]:
+    number_of_np_arrays = sum(issubclass(type(i), np.ndarray) for i in images)
+    if number_of_np_arrays >= (len(images) // 2):
+        return "cv2"
+    return "pillow"
+
+
+def _calculate_aggregated_images_shape(
+    images: list[np.ndarray], aggregator: Callable[[list[int]], float]
+) -> tuple[int, int]:
+    height = round(aggregator([i.shape[0] for i in images]))
+    width = round(aggregator([i.shape[1] for i in images]))
+    return width, height
+
+
+SHAPE_AGGREGATION_FUN = {
+    "min": partial(_calculate_aggregated_images_shape, aggregator=np.min),
+    "max": partial(_calculate_aggregated_images_shape, aggregator=np.max),
+    "avg": partial(_calculate_aggregated_images_shape, aggregator=np.average),
+}
+
+
+def _aggregate_images_shape(
+    images: list[np.ndarray], mode: Literal["min", "max", "avg"]
+) -> tuple[int, int]:
+    if mode not in SHAPE_AGGREGATION_FUN:
+        raise ValueError(
+            f"Could not aggregate images shape - provided unknown mode: {mode}. "
+            f"Supported modes: {list(SHAPE_AGGREGATION_FUN.keys())}."
+        )
+    return SHAPE_AGGREGATION_FUN[mode](images)
+
+
+def _establish_grid_size(
+    images: list[np.ndarray], grid_size: tuple[int | None, int | None] | None
+) -> tuple[int, int]:
+    if grid_size is None or all(e is None for e in grid_size):
+        return _negotiate_grid_size(images=images)
+    if grid_size[0] is None:
+        return math.ceil(len(images) / grid_size[1]), grid_size[1]
+    if grid_size[1] is None:
+        return grid_size[0], math.ceil(len(images) / grid_size[0])
+    return grid_size
+
+
+def _negotiate_grid_size(images: list[np.ndarray]) -> tuple[int, int]:
+    if len(images) <= MAX_COLUMNS_FOR_SINGLE_ROW_GRID:
+        return 1, len(images)
+    nearest_sqrt = math.ceil(np.sqrt(len(images)))
+    proposed_columns = nearest_sqrt
+    proposed_rows = nearest_sqrt
+    while proposed_columns * (proposed_rows - 1) >= len(images):
+        proposed_rows -= 1
+    return proposed_rows, proposed_columns
+
+
+def _generate_tiles(
+    images: list[np.ndarray],
+    grid_size: tuple[int, int],
+    single_tile_size: tuple[int, int],
+    tile_padding_color: tuple[int, int, int],
+    tile_margin: int,
+    tile_margin_color: tuple[int, int, int],
+    titles: list[str | None] | None,
+    titles_anchors: list[Point | None],
+    titles_color: tuple[int, int, int],
+    titles_scale: float | None,
+    titles_thickness: int,
+    titles_padding: int,
+    titles_text_font: int,
+    titles_background_color: tuple[int, int, int],
+    default_title_placement: RelativePosition,
+) -> np.ndarray:
+    images = _draw_texts(
+        images=images,
+        titles=titles,
+        titles_anchors=titles_anchors,
+        titles_color=titles_color,
+        titles_scale=titles_scale,
+        titles_thickness=titles_thickness,
+        titles_padding=titles_padding,
+        titles_text_font=titles_text_font,
+        titles_background_color=titles_background_color,
+        default_title_placement=default_title_placement,
+    )
+    rows, columns = grid_size
+    tiles_elements = list(create_batches(sequence=images, batch_size=columns))
+    while len(tiles_elements[-1]) < columns:
+        tiles_elements[-1].append(
+            _generate_color_image(shape=single_tile_size, color=tile_padding_color)
+        )
+    while len(tiles_elements) < rows:
+        tiles_elements.append(
+            [_generate_color_image(shape=single_tile_size, color=tile_padding_color)]
+            * columns
+        )
+    return _merge_tiles_elements(
+        tiles_elements=tiles_elements,
+        grid_size=grid_size,
+        single_tile_size=single_tile_size,
+        tile_margin=tile_margin,
+        tile_margin_color=tile_margin_color,
+    )
+
+
+def _draw_texts(
+    images: list[np.ndarray],
+    titles: list[str | None] | None,
+    titles_anchors: list[Point | None],
+    titles_color: tuple[int, int, int],
+    titles_scale: float | None,
+    titles_thickness: int,
+    titles_padding: int,
+    titles_text_font: int,
+    titles_background_color: tuple[int, int, int],
+    default_title_placement: RelativePosition,
+) -> list[np.ndarray]:
+    if titles is None:
+        return images
+    titles_anchors = _prepare_default_titles_anchors(
+        images=images,
+        titles_anchors=titles_anchors,
+        default_title_placement=default_title_placement,
+    )
+    if titles_scale is None:
+        image_height, image_width = images[0].shape[:2]
+        titles_scale = calculate_optimal_text_scale(
+            resolution_wh=(image_width, image_height)
+        )
+    result = []
+    for image, text, anchor in zip(images, titles, titles_anchors):
+        if text is None:
+            result.append(image)
+            continue
+        processed_image = draw_text(
+            scene=image,
+            text=text,
+            text_anchor=anchor,
+            text_color=Color.from_bgr_tuple(titles_color),
+            text_scale=titles_scale,
+            text_thickness=titles_thickness,
+            text_padding=titles_padding,
+            text_font=titles_text_font,
+            background_color=Color.from_bgr_tuple(titles_background_color),
+        )
+        result.append(processed_image)
+    return result
+
+
+def _prepare_default_titles_anchors(
+    images: list[np.ndarray],
+    titles_anchors: list[Point | None],
+    default_title_placement: RelativePosition,
+) -> list[Point]:
+    result = []
+    for image, anchor in zip(images, titles_anchors):
+        if anchor is not None:
+            result.append(anchor)
+            continue
+        image_height, image_width = image.shape[:2]
+        if default_title_placement == "top":
+            default_anchor = Point(x=image_width / 2, y=image_height * 0.1)
+        else:
+            default_anchor = Point(x=image_width / 2, y=image_height * 0.9)
+        result.append(default_anchor)
+    return result
+
+
+def _merge_tiles_elements(
+    tiles_elements: list[list[np.ndarray]],
+    grid_size: tuple[int, int],
+    single_tile_size: tuple[int, int],
+    tile_margin: int,
+    tile_margin_color: tuple[int, int, int],
+) -> np.ndarray:
+    vertical_padding = (
+        np.ones((single_tile_size[1], tile_margin, 3)) * tile_margin_color
+    )
+    merged_rows = [
+        np.concatenate(
+            list(
+                itertools.chain.from_iterable(
+                    zip(row, [vertical_padding] * grid_size[1])
+                )
+            )[:-1],
+            axis=1,
+        )
+        for row in tiles_elements
+    ]
+    row_width = merged_rows[0].shape[1]
+    horizontal_padding = (
+        np.ones((tile_margin, row_width, 3), dtype=np.uint8) * tile_margin_color
+    )
+    rows_with_paddings = []
+    for row in merged_rows:
+        rows_with_paddings.append(row)
+        rows_with_paddings.append(horizontal_padding)
+    return np.concatenate(
+        rows_with_paddings[:-1],
+        axis=0,
+    ).astype(np.uint8)
+
+
+def _generate_color_image(
+    shape: tuple[int, int], color: tuple[int, int, int]
+) -> np.ndarray:
+    return np.ones((*shape[::-1], 3), dtype=np.uint8) * color