Merge pull request #56 from roboflow/feature/rafactor_of_functions_in_notebooks_package_and_intitial_version_of_voc_xml_support

SkalskiP · web-flow · commit bc12a8ea0bbc · 2023-04-05T17:18:38.000+02:00
feature/rafactor_of_functions_in_notebooks_package_and_intitial_version_of_voc_xml_support
diff --git a/docs/annotation/voc.md b/docs/annotation/voc.md
@@ -0,0 +1,3 @@
+## detections_to_voc_xml
+
+:::supervision.annotation.voc.detections_to_voc_xml
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,3 +1,11 @@
+### 0.4.0 <small>April 5, 2023</small> 
+
+- Added [[#46](https://github.com/roboflow/supervision/discussions/48)]: `Detections.empty` to allow easy creation of empty `Detections` objects.
+- Added [[#56](https://github.com/roboflow/supervision/pull/56)]: `Detections.from_roboflow` to allow easy creation of `Detections` objects from Roboflow API inference results.
+- Added [[#56](https://github.com/roboflow/supervision/pull/56)]: `plot_images_grid` to allow easy plotting of multiple images on single plot.
+- Added [[#56](https://github.com/roboflow/supervision/pull/56)]: initial support for Pascal VOC XML format with `detections_to_voc_xml` method.
+- Changed [[#56](https://github.com/roboflow/supervision/pull/56)]: `show_frame_in_notebook` refactored and renamed to `plot_image`.
+
 ### 0.3.2 <small>March 23, 2023</small> 
 
 - Changed [[#50](https://github.com/roboflow/supervision/issues/50)]: Allow `Detections.class_id` to be `None`. 
diff --git a/docs/notebook/utils.md b/docs/notebook/utils.md
@@ -1,3 +1,7 @@
-## show_frame_in_notebook
+## plot_image
 
-:::supervision.notebook.utils.show_frame_in_notebook
+:::supervision.notebook.utils.plot_image
+
+## plot_images_grid
+
+:::supervision.notebook.utils.plot_images_grid
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -31,6 +31,8 @@ nav:
         - Utils: detection/utils.md
     - Draw:
         - Utils: draw/utils.md
+    - Annotations:
+        - Pascal VOC XML: annotation/voc.md
     - Notebook:
         - Utils: notebook/utils.md
   - Changelog: changelog.md
diff --git a/supervision/__init__.py b/supervision/__init__.py
@@ -1,5 +1,6 @@
-__version__ = "0.3.2"
+__version__ = "0.4.0"
 
+from supervision.annotation.voc import detections_to_voc_xml
 from supervision.detection.annotate import BoxAnnotator
 from supervision.detection.core import Detections
 from supervision.detection.line_counter import LineZone, LineZoneAnnotator
@@ -9,7 +10,7 @@
 from supervision.draw.utils import draw_filled_rectangle, draw_polygon, draw_text
 from supervision.geometry.core import Point, Position, Rect
 from supervision.geometry.utils import get_polygon_center
-from supervision.notebook.utils import show_frame_in_notebook
+from supervision.notebook.utils import plot_image, plot_images_grid
 from supervision.video import (
     VideoInfo,
     VideoSink,
diff --git a/supervision/annotation/__init__.py b/supervision/annotation/__init__.py
diff --git a/supervision/annotation/voc.py b/supervision/annotation/voc.py
@@ -0,0 +1,104 @@
+from typing import List
+from xml.dom.minidom import parseString
+from xml.etree.ElementTree import Element, SubElement, tostring
+
+from supervision.detection.core import Detections
+
+
+def detections_to_voc_xml(
+    detections: Detections,
+    classes: List[str],
+    filename: str,
+    width: int,
+    height: int,
+    depth: int = 3,
+) -> str:
+    """
+    Converts Detections object to Pascal VOC XML format.
+
+    Args:
+        detections (Detections): A Detections object containing bounding boxes, class ids, and other relevant information.
+        classes (List[str]): A list of class names corresponding to the class ids in the Detections object.
+        filename (str): The name of the image file associated with the detections.
+        width (int): The width of the image in pixels.
+        height (int): The height of the image in pixels.
+        depth (int, optional): The number of color channels in the image. Defaults to 3 for RGB images.
+
+    Returns:
+        str: An XML string in Pascal VOC format representing the detections.
+
+    Examples:
+        ```python
+        >>> import numpy as np
+        >>> import supervision as sv
+
+        >>> xyxy = np.array([
+        ...     [50, 30, 200, 180],
+        ...     [20, 40, 150, 190]
+        ... ])
+        >>> class_id = np.array([1, 0])
+        >>> detections = Detections(xyxy=xyxy, class_id=class_id)
+
+        >>> classes = ["dog", "cat"]
+
+        >>> voc_xml = detections_to_voc_xml(
+        ...     detections=detections,
+        ...     classes=classes,
+        ...     filename="image1.jpg",
+        ...     width=500,
+        ...     height=400
+        ... )
+        ```
+    """
+
+    # Create root element
+    annotation = Element("annotation")
+
+    # Add folder element
+    folder = SubElement(annotation, "folder")
+    folder.text = "VOC"
+
+    # Add filename element
+    fname = SubElement(annotation, "filename")
+    fname.text = filename
+
+    # Add source element
+    source = SubElement(annotation, "source")
+    database = SubElement(source, "database")
+    database.text = "roboflow.ai"
+
+    # Add size element
+    size = SubElement(annotation, "size")
+    w = SubElement(size, "width")
+    w.text = str(width)
+    h = SubElement(size, "height")
+    h.text = str(height)
+    d = SubElement(size, "depth")
+    d.text = str(depth)
+
+    # Add segmented element
+    segmented = SubElement(annotation, "segmented")
+    segmented.text = "0"
+
+    # Add object elements
+    for i in range(detections.xyxy.shape[0]):
+        obj = SubElement(annotation, "object")
+
+        class_id = detections.class_id[i] if detections.class_id is not None else None
+        name = SubElement(obj, "name")
+        name.text = classes[class_id] if class_id is not None else "unknown"
+
+        bndbox = SubElement(obj, "bndbox")
+        xmin = SubElement(bndbox, "xmin")
+        xmin.text = str(int(detections.xyxy[i, 0]))
+        ymin = SubElement(bndbox, "ymin")
+        ymin.text = str(int(detections.xyxy[i, 1]))
+        xmax = SubElement(bndbox, "xmax")
+        xmax.text = str(int(detections.xyxy[i, 2]))
+        ymax = SubElement(bndbox, "ymax")
+        ymax.text = str(int(detections.xyxy[i, 3]))
+
+    # Generate XML string
+    xml_string = parseString(tostring(annotation)).toprettyxml(indent="  ")
+
+    return xml_string
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Iterator, Optional, Tuple, Union
+from typing import Iterator, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -176,6 +176,31 @@ def from_detectron2(cls, detectron2_results) -> Detections:
             .astype(int),
         )
 
+    @classmethod
+    def from_roboflow(cls, roboflow_result: dict, class_list: List[str]) -> Detections:
+        xyxy = []
+        confidence = []
+        class_id = []
+
+        for prediction in roboflow_result["predictions"]:
+            x = prediction["x"]
+            y = prediction["y"]
+            width = prediction["width"]
+            height = prediction["height"]
+            x_min = x - width / 2
+            y_min = y - height / 2
+            x_max = x_min + width
+            y_max = y_min + height
+            xyxy.append([x_min, y_min, x_max, y_max])
+            class_id.append(class_list.index(prediction["class"]))
+            confidence.append(prediction["confidence"])
+
+        return Detections(
+            xyxy=np.array(xyxy),
+            confidence=np.array(confidence),
+            class_id=np.array(class_id).astype(int),
+        )
+
     @classmethod
     def from_coco_annotations(cls, coco_annotation: dict) -> Detections:
         xyxy, class_id = [], []
@@ -187,6 +212,14 @@ def from_coco_annotations(cls, coco_annotation: dict) -> Detections:
 
         return cls(xyxy=np.array(xyxy), class_id=np.array(class_id))
 
+    @classmethod
+    def empty(cls) -> Detections:
+        return cls(
+            xyxy=np.empty((0, 4), dtype=np.float32),
+            confidence=np.array([], dtype=np.float32),
+            class_id=np.array([], dtype=int),
+        )
+
     def get_anchor_coordinates(self, anchor: Position) -> np.ndarray:
         """
         Returns the bounding box coordinates for a specific anchor.
diff --git a/supervision/notebook/utils.py b/supervision/notebook/utils.py
@@ -1,33 +1,92 @@
-from typing import Tuple
+from typing import List, Optional, Tuple
 
 import cv2
 import matplotlib.pyplot as plt
 import numpy as np
 
 
-def show_frame_in_notebook(
-    frame: np.ndarray, size: Tuple[int, int] = (10, 10), cmap: str = "gray"
-):
+def plot_image(
+    image: np.ndarray, size: Tuple[int, int] = (10, 10), cmap: Optional[str] = "gray"
+) -> None:
     """
-    Display a frame in Jupyter Notebook using Matplotlib
+    Plots image using matplotlib.
 
-    Attributes:
-        frame (np.ndarray): The frame to be displayed.
-        size (Tuple[int, int]): The size of the plot. default:(10,10)
-        cmap (str): the colormap to use for single channel images. default:gray
+    Args:
+        image (np.ndarray): The frame to be displayed.
+        size (Tuple[int, int]): The size of the plot.
+        cmap (str): the colormap to use for single channel images.
 
     Examples:
         ```python
+        >>> import cv2
         >>> import supervision as sv
 
+        >>> image = cv2.imread("path/to/image.jpg")
+
         %matplotlib inline
-        >>> sv.show_frame_in_notebook(frame, (16, 16))
+        >>> sv.plot_image(image, (16, 16))
         ```
     """
-    if frame.ndim == 2:
+    if image.ndim == 2:
         plt.figure(figsize=size)
-        plt.imshow(frame, cmap=cmap)
+        plt.imshow(image, cmap=cmap)
     else:
         plt.figure(figsize=size)
-        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    plt.show()
+
+
+def plot_images_grid(
+    images: List[np.ndarray],
+    grid_size: Tuple[int, int],
+    titles: Optional[List[str]] = None,
+    size: Tuple[int, int] = (12, 12),
+) -> None:
+    """
+    Plots images in a grid using matplotlib.
+
+    Args:
+       images (List[np.ndarray]): A list of images as numpy arrays.
+       grid_size (Tuple[int, int]): A tuple specifying the number of rows and columns for the grid.
+       titles (Optional[List[str]]): A list of titles for each image. Defaults to None.
+       size (Tuple[int, int]): A tuple specifying the width and height of the entire plot in inches.
+
+    Raises:
+       ValueError: If the number of images exceeds the grid size.
+
+    Examples:
+        ```python
+        >>> import cv2
+        >>> import supervision as sv
+
+        >>> image1 = cv2.imread("path/to/image1.jpg")
+        >>> image2 = cv2.imread("path/to/image2.jpg")
+        >>> image3 = cv2.imread("path/to/image3.jpg")
+
+        >>> images = [image1, image2, image3]
+        >>> titles = ["Image 1", "Image 2", "Image 3"]
+
+        %matplotlib inline
+        >>> plot_images_grid(images, grid_size=(2, 2), titles=titles, figsize=(16, 16))
+        ```
+    """
+
+    nrows, ncols = grid_size
+
+    if len(images) > nrows * ncols:
+        raise ValueError(
+            "The number of images exceeds the grid size. Please increase the grid size or reduce the number of images."
+        )
+
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=size)
+
+    for idx, ax in enumerate(axes.flat):
+        if idx < len(images):
+            ax.imshow(cv2.cvtColor(images[idx], cv2.COLOR_BGR2RGB))
+            if titles is not None and idx < len(titles):
+                ax.set_title(titles[idx])
+            ax.axis("off")
+        else:
+            ax.axis("off")
+
     plt.show()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+## detections_to_voc_xml`
	`2`	`+`
	`3`	`+:::supervision.annotation.voc.detections_to_voc_xml`