|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
| 3 | +import itertools |
| 4 | +import math |
3 | 5 | import os |
4 | 6 | import shutil |
5 | | -from typing import Any, Literal |
| 7 | +from collections.abc import Callable |
| 8 | +from functools import partial |
| 9 | +from typing import Literal |
6 | 10 |
|
7 | 11 | import cv2 |
8 | 12 | import numpy as np |
|
12 | 16 |
|
13 | 17 | from supervision.annotators.base import ImageType |
14 | 18 | from supervision.draw.color import Color, unify_to_bgr |
| 19 | +from supervision.draw.utils import calculate_optimal_text_scale, draw_text |
15 | 20 | from supervision.geometry.core import Point |
16 | 21 | from supervision.utils.conversion import ( |
| 22 | + cv2_to_pillow, |
17 | 23 | ensure_cv2_image_for_standalone_function, |
| 24 | + images_to_cv2, |
18 | 25 | ) |
| 26 | +from supervision.utils.iterables import create_batches, fill |
| 27 | + |
| 28 | +RelativePosition = Literal["top", "bottom"] |
| 29 | + |
| 30 | +MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3 |
19 | 31 |
|
20 | 32 |
|
21 | 33 | def crop_image( |
@@ -654,6 +666,336 @@ def create_tiles( |
654 | 666 | titles_padding: int = 10, |
655 | 667 | titles_text_font: int = cv2.FONT_HERSHEY_SIMPLEX, |
656 | 668 | titles_background_color: tuple[int, int, int] | Color = Color.from_hex("#D9D9D9"), |
657 | | - default_title_placement: Any = "top", |
| 669 | + default_title_placement: RelativePosition = "top", |
658 | 670 | ) -> ImageType: |
659 | | - return None |
| 671 | + """ |
| 672 | + Creates tiles mosaic from input images, automating grid placement and |
| 673 | + converting images to common resolution maintaining aspect ratio. It is |
| 674 | + also possible to render text titles on tiles, using optional set of |
| 675 | + parameters specifying text drawing (see parameters description). |
| 676 | +
|
| 677 | + Automated grid placement will try to maintain square shape of grid |
| 678 | + (with size being the nearest integer square root of #images), up to two exceptions: |
| 679 | + * if there are up to 3 images - images will be displayed in single row |
| 680 | + * if square-grid placement causes last row to be empty - number of rows is trimmed |
| 681 | + until last row has at least one image |
| 682 | +
|
| 683 | + Args: |
| 684 | + images (List[ImageType]): Images to create tiles. Elements can be either |
| 685 | + np.ndarray or PIL.Image, common representation will be agreed by the |
| 686 | + function. |
| 687 | + grid_size (Optional[Tuple[Optional[int], Optional[int]]]): Expected grid |
| 688 | + size in format (n_rows, n_cols). If not given - automated grid placement |
| 689 | + will be applied. One may also provide only one out of two elements of the |
| 690 | + tuple - then grid will be created with either n_rows or n_cols fixed, |
| 691 | + leaving the other dimension to be adjusted by the number of images |
| 692 | + single_tile_size (Optional[Tuple[int, int]]): sizeof a single tile element |
| 693 | + provided in (width, height) format. If not given - size of tile will be |
| 694 | + automatically calculated based on `tile_scaling` parameter. |
| 695 | + tile_scaling (Literal["min", "max", "avg"]): If `single_tile_size` is not |
| 696 | + given - parameter will be used to calculate tile size - using |
| 697 | + min / max / avg size of image provided in `images` list. |
| 698 | + tile_padding_color (Union[Tuple[int, int, int], sv.Color]): Color to be used in |
| 699 | + images letterbox procedure (while standardising tiles sizes) as a padding. |
| 700 | + If tuple provided - should be BGR. |
| 701 | + tile_margin (int): size of margin between tiles (in pixels) |
| 702 | + tile_margin_color (Union[Tuple[int, int, int], sv.Color]): Color of tile margin. |
| 703 | + If tuple provided - should be BGR. |
| 704 | + return_type (Literal["auto", "cv2", "pillow"]): Parameter dictates the format of |
| 705 | + return image. One may choose specific type ("cv2" or "pillow") to enforce |
| 706 | + conversion. "auto" mode takes a majority vote between types of elements in |
| 707 | + `images` list - resolving draws in favour of OpenCV format. "auto" can be |
| 708 | + safely used when all input images are of the same type. |
| 709 | + titles (Optional[List[Optional[str]]]): Optional titles to be added to tiles. |
| 710 | + Elements of that list may be empty - then specific tile (in order presented |
| 711 | + in `images` parameter) will not be filled with title. It is possible to |
| 712 | + provide list of titles shorter than `images` - then remaining titles will |
| 713 | + be assumed empty. |
| 714 | + titles_anchors (Optional[Union[Point, List[Optional[Point]]]]): Parameter to |
| 715 | + specify anchor points for titles. It is possible to specify anchor either |
| 716 | + globally or for specific tiles (following order of `images`). |
| 717 | + If not given (either globally, or for specific element of the list), |
| 718 | + it will be calculated automatically based on `default_title_placement`. |
| 719 | + titles_color (Union[Tuple[int, int, int], Color]): Color of titles text. |
| 720 | + If tuple provided - should be BGR. |
| 721 | + titles_scale (Optional[float]): Scale of titles. If not provided - value will |
| 722 | + be calculated using `calculate_optimal_text_scale(...)`. |
| 723 | + titles_thickness (int): Thickness of titles text. |
| 724 | + titles_padding (int): Size of titles padding. |
| 725 | + titles_text_font (int): Font to be used to render titles. Must be integer |
| 726 | + constant representing OpenCV font. |
| 727 | + (See docs: https://docs.opencv.org/4.x/d6/d6e/group__imgproc__draw.html) |
| 728 | + titles_background_color (Union[Tuple[int, int, int], Color]): Color of title |
| 729 | + text padding. |
| 730 | + default_title_placement (Literal["top", "bottom"]): Parameter specifies title |
| 731 | + anchor placement in case if explicit anchor is not provided. |
| 732 | +
|
| 733 | + Returns: |
| 734 | + ImageType: Image with all input images located in tails grid. The output type is |
| 735 | + determined by `return_type` parameter. |
| 736 | +
|
| 737 | + Raises: |
| 738 | + ValueError: In case when input images list is empty, provided `grid_size` is too |
| 739 | + small to fit all images, `tile_scaling` mode is invalid. |
| 740 | + """ |
| 741 | + if len(images) == 0: |
| 742 | + raise ValueError("Could not create image tiles from empty list of images.") |
| 743 | + if return_type == "auto": |
| 744 | + return_type = _negotiate_tiles_format(images=images) |
| 745 | + tile_padding_color = unify_to_bgr(color=tile_padding_color) |
| 746 | + tile_margin_color = unify_to_bgr(color=tile_margin_color) |
| 747 | + images = images_to_cv2(images=images) |
| 748 | + if single_tile_size is None: |
| 749 | + single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling) |
| 750 | + resized_images = [ |
| 751 | + letterbox_image( |
| 752 | + image=i, resolution_wh=single_tile_size, color=tile_padding_color |
| 753 | + ) |
| 754 | + for i in images |
| 755 | + ] |
| 756 | + grid_size = _establish_grid_size(images=images, grid_size=grid_size) |
| 757 | + if len(images) > grid_size[0] * grid_size[1]: |
| 758 | + raise ValueError( |
| 759 | + f"Could not place {len(images)} in grid with size: {grid_size}." |
| 760 | + ) |
| 761 | + if titles is not None: |
| 762 | + titles = fill(sequence=titles, desired_size=len(images), content=None) |
| 763 | + titles_anchors = ( |
| 764 | + [titles_anchors] |
| 765 | + if not issubclass(type(titles_anchors), list) |
| 766 | + else titles_anchors |
| 767 | + ) |
| 768 | + titles_anchors = fill( |
| 769 | + sequence=titles_anchors, desired_size=len(images), content=None |
| 770 | + ) |
| 771 | + titles_color = unify_to_bgr(color=titles_color) |
| 772 | + titles_background_color = unify_to_bgr(color=titles_background_color) |
| 773 | + tiles = _generate_tiles( |
| 774 | + images=resized_images, |
| 775 | + grid_size=grid_size, |
| 776 | + single_tile_size=single_tile_size, |
| 777 | + tile_padding_color=tile_padding_color, |
| 778 | + tile_margin=tile_margin, |
| 779 | + tile_margin_color=tile_margin_color, |
| 780 | + titles=titles, |
| 781 | + titles_anchors=titles_anchors, |
| 782 | + titles_color=titles_color, |
| 783 | + titles_scale=titles_scale, |
| 784 | + titles_thickness=titles_thickness, |
| 785 | + titles_padding=titles_padding, |
| 786 | + titles_text_font=titles_text_font, |
| 787 | + titles_background_color=titles_background_color, |
| 788 | + default_title_placement=default_title_placement, |
| 789 | + ) |
| 790 | + if return_type == "pillow": |
| 791 | + tiles = cv2_to_pillow(image=tiles) |
| 792 | + return tiles |
| 793 | + |
| 794 | + |
| 795 | +def _negotiate_tiles_format(images: list[ImageType]) -> Literal["cv2", "pillow"]: |
| 796 | + number_of_np_arrays = sum(issubclass(type(i), np.ndarray) for i in images) |
| 797 | + if number_of_np_arrays >= (len(images) // 2): |
| 798 | + return "cv2" |
| 799 | + return "pillow" |
| 800 | + |
| 801 | + |
| 802 | +def _calculate_aggregated_images_shape( |
| 803 | + images: list[np.ndarray], aggregator: Callable[[list[int]], float] |
| 804 | +) -> tuple[int, int]: |
| 805 | + height = round(aggregator([i.shape[0] for i in images])) |
| 806 | + width = round(aggregator([i.shape[1] for i in images])) |
| 807 | + return width, height |
| 808 | + |
| 809 | + |
| 810 | +SHAPE_AGGREGATION_FUN = { |
| 811 | + "min": partial(_calculate_aggregated_images_shape, aggregator=np.min), |
| 812 | + "max": partial(_calculate_aggregated_images_shape, aggregator=np.max), |
| 813 | + "avg": partial(_calculate_aggregated_images_shape, aggregator=np.average), |
| 814 | +} |
| 815 | + |
| 816 | + |
| 817 | +def _aggregate_images_shape( |
| 818 | + images: list[np.ndarray], mode: Literal["min", "max", "avg"] |
| 819 | +) -> tuple[int, int]: |
| 820 | + if mode not in SHAPE_AGGREGATION_FUN: |
| 821 | + raise ValueError( |
| 822 | + f"Could not aggregate images shape - provided unknown mode: {mode}. " |
| 823 | + f"Supported modes: {list(SHAPE_AGGREGATION_FUN.keys())}." |
| 824 | + ) |
| 825 | + return SHAPE_AGGREGATION_FUN[mode](images) |
| 826 | + |
| 827 | + |
| 828 | +def _establish_grid_size( |
| 829 | + images: list[np.ndarray], grid_size: tuple[int | None, int | None] | None |
| 830 | +) -> tuple[int, int]: |
| 831 | + if grid_size is None or all(e is None for e in grid_size): |
| 832 | + return _negotiate_grid_size(images=images) |
| 833 | + if grid_size[0] is None: |
| 834 | + return math.ceil(len(images) / grid_size[1]), grid_size[1] |
| 835 | + if grid_size[1] is None: |
| 836 | + return grid_size[0], math.ceil(len(images) / grid_size[0]) |
| 837 | + return grid_size |
| 838 | + |
| 839 | + |
| 840 | +def _negotiate_grid_size(images: list[np.ndarray]) -> tuple[int, int]: |
| 841 | + if len(images) <= MAX_COLUMNS_FOR_SINGLE_ROW_GRID: |
| 842 | + return 1, len(images) |
| 843 | + nearest_sqrt = math.ceil(np.sqrt(len(images))) |
| 844 | + proposed_columns = nearest_sqrt |
| 845 | + proposed_rows = nearest_sqrt |
| 846 | + while proposed_columns * (proposed_rows - 1) >= len(images): |
| 847 | + proposed_rows -= 1 |
| 848 | + return proposed_rows, proposed_columns |
| 849 | + |
| 850 | + |
| 851 | +def _generate_tiles( |
| 852 | + images: list[np.ndarray], |
| 853 | + grid_size: tuple[int, int], |
| 854 | + single_tile_size: tuple[int, int], |
| 855 | + tile_padding_color: tuple[int, int, int], |
| 856 | + tile_margin: int, |
| 857 | + tile_margin_color: tuple[int, int, int], |
| 858 | + titles: list[str | None] | None, |
| 859 | + titles_anchors: list[Point | None], |
| 860 | + titles_color: tuple[int, int, int], |
| 861 | + titles_scale: float | None, |
| 862 | + titles_thickness: int, |
| 863 | + titles_padding: int, |
| 864 | + titles_text_font: int, |
| 865 | + titles_background_color: tuple[int, int, int], |
| 866 | + default_title_placement: RelativePosition, |
| 867 | +) -> np.ndarray: |
| 868 | + images = _draw_texts( |
| 869 | + images=images, |
| 870 | + titles=titles, |
| 871 | + titles_anchors=titles_anchors, |
| 872 | + titles_color=titles_color, |
| 873 | + titles_scale=titles_scale, |
| 874 | + titles_thickness=titles_thickness, |
| 875 | + titles_padding=titles_padding, |
| 876 | + titles_text_font=titles_text_font, |
| 877 | + titles_background_color=titles_background_color, |
| 878 | + default_title_placement=default_title_placement, |
| 879 | + ) |
| 880 | + rows, columns = grid_size |
| 881 | + tiles_elements = list(create_batches(sequence=images, batch_size=columns)) |
| 882 | + while len(tiles_elements[-1]) < columns: |
| 883 | + tiles_elements[-1].append( |
| 884 | + _generate_color_image(shape=single_tile_size, color=tile_padding_color) |
| 885 | + ) |
| 886 | + while len(tiles_elements) < rows: |
| 887 | + tiles_elements.append( |
| 888 | + [_generate_color_image(shape=single_tile_size, color=tile_padding_color)] |
| 889 | + * columns |
| 890 | + ) |
| 891 | + return _merge_tiles_elements( |
| 892 | + tiles_elements=tiles_elements, |
| 893 | + grid_size=grid_size, |
| 894 | + single_tile_size=single_tile_size, |
| 895 | + tile_margin=tile_margin, |
| 896 | + tile_margin_color=tile_margin_color, |
| 897 | + ) |
| 898 | + |
| 899 | + |
| 900 | +def _draw_texts( |
| 901 | + images: list[np.ndarray], |
| 902 | + titles: list[str | None] | None, |
| 903 | + titles_anchors: list[Point | None], |
| 904 | + titles_color: tuple[int, int, int], |
| 905 | + titles_scale: float | None, |
| 906 | + titles_thickness: int, |
| 907 | + titles_padding: int, |
| 908 | + titles_text_font: int, |
| 909 | + titles_background_color: tuple[int, int, int], |
| 910 | + default_title_placement: RelativePosition, |
| 911 | +) -> list[np.ndarray]: |
| 912 | + if titles is None: |
| 913 | + return images |
| 914 | + titles_anchors = _prepare_default_titles_anchors( |
| 915 | + images=images, |
| 916 | + titles_anchors=titles_anchors, |
| 917 | + default_title_placement=default_title_placement, |
| 918 | + ) |
| 919 | + if titles_scale is None: |
| 920 | + image_height, image_width = images[0].shape[:2] |
| 921 | + titles_scale = calculate_optimal_text_scale( |
| 922 | + resolution_wh=(image_width, image_height) |
| 923 | + ) |
| 924 | + result = [] |
| 925 | + for image, text, anchor in zip(images, titles, titles_anchors): |
| 926 | + if text is None: |
| 927 | + result.append(image) |
| 928 | + continue |
| 929 | + processed_image = draw_text( |
| 930 | + scene=image, |
| 931 | + text=text, |
| 932 | + text_anchor=anchor, |
| 933 | + text_color=Color.from_bgr_tuple(titles_color), |
| 934 | + text_scale=titles_scale, |
| 935 | + text_thickness=titles_thickness, |
| 936 | + text_padding=titles_padding, |
| 937 | + text_font=titles_text_font, |
| 938 | + background_color=Color.from_bgr_tuple(titles_background_color), |
| 939 | + ) |
| 940 | + result.append(processed_image) |
| 941 | + return result |
| 942 | + |
| 943 | + |
| 944 | +def _prepare_default_titles_anchors( |
| 945 | + images: list[np.ndarray], |
| 946 | + titles_anchors: list[Point | None], |
| 947 | + default_title_placement: RelativePosition, |
| 948 | +) -> list[Point]: |
| 949 | + result = [] |
| 950 | + for image, anchor in zip(images, titles_anchors): |
| 951 | + if anchor is not None: |
| 952 | + result.append(anchor) |
| 953 | + continue |
| 954 | + image_height, image_width = image.shape[:2] |
| 955 | + if default_title_placement == "top": |
| 956 | + default_anchor = Point(x=image_width / 2, y=image_height * 0.1) |
| 957 | + else: |
| 958 | + default_anchor = Point(x=image_width / 2, y=image_height * 0.9) |
| 959 | + result.append(default_anchor) |
| 960 | + return result |
| 961 | + |
| 962 | + |
| 963 | +def _merge_tiles_elements( |
| 964 | + tiles_elements: list[list[np.ndarray]], |
| 965 | + grid_size: tuple[int, int], |
| 966 | + single_tile_size: tuple[int, int], |
| 967 | + tile_margin: int, |
| 968 | + tile_margin_color: tuple[int, int, int], |
| 969 | +) -> np.ndarray: |
| 970 | + vertical_padding = ( |
| 971 | + np.ones((single_tile_size[1], tile_margin, 3)) * tile_margin_color |
| 972 | + ) |
| 973 | + merged_rows = [ |
| 974 | + np.concatenate( |
| 975 | + list( |
| 976 | + itertools.chain.from_iterable( |
| 977 | + zip(row, [vertical_padding] * grid_size[1]) |
| 978 | + ) |
| 979 | + )[:-1], |
| 980 | + axis=1, |
| 981 | + ) |
| 982 | + for row in tiles_elements |
| 983 | + ] |
| 984 | + row_width = merged_rows[0].shape[1] |
| 985 | + horizontal_padding = ( |
| 986 | + np.ones((tile_margin, row_width, 3), dtype=np.uint8) * tile_margin_color |
| 987 | + ) |
| 988 | + rows_with_paddings = [] |
| 989 | + for row in merged_rows: |
| 990 | + rows_with_paddings.append(row) |
| 991 | + rows_with_paddings.append(horizontal_padding) |
| 992 | + return np.concatenate( |
| 993 | + rows_with_paddings[:-1], |
| 994 | + axis=0, |
| 995 | + ).astype(np.uint8) |
| 996 | + |
| 997 | + |
| 998 | +def _generate_color_image( |
| 999 | + shape: tuple[int, int], color: tuple[int, int, int] |
| 1000 | +) -> np.ndarray: |
| 1001 | + return np.ones((*shape[::-1], 3), dtype=np.uint8) * color |
0 commit comments