Merge pull request #7 from HMellor/add-pre-commit

hmellor · web-flow · commit a126fe8921ac · 2023-09-15T08:19:27.000+01:00
Add pre-commit and do some formatting
diff --git a/.gitignore b/.gitignore
@@ -90,18 +90,8 @@ ENV/
 env.bak/
 venv.bak/
 
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
+# VS Code workspace settings
+.vscode
 
 # Dataset directories
 VOCdevkit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 23.9.1
+    hooks:
+    -   id: black
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+    -   id: isort
diff --git a/README.md b/README.md
@@ -1,20 +1,26 @@
 # PyTorch Superpixels
+
 - [Why use superpixels?](#why-use-superpixels)
 - [Example usage](#example-usage)
+
 ## Why use superpixels?
+
 Dimensionality reduction allows for the use of simpler networks or more complex objectives. A common way of doing this is to simply downsample the images so that there are fewer pixels to contend with. However, this is a lossy operation so detail (and therefore the upper bound on experimental results) is reduced.
 
 Superpixels slightly alleviate this problem because they are able to encode information about edges within themselves. Generating superpixels is an unsupervised clustering operation. Whilst there are already clustering packages written for Python (some of which this project depends on), they all operate with NumPy arrays. This means that they cannot take advantage of GPU acceleration in the way that PyTorch tensors can.
 
 The aim of this project is to bridge the gap between these existing packages and PyTorch so that superpixels can be readily used as an alternative to pixels in various machine learning experiments.
+
 ## Example usage
+
 Here is some example code that uses superpixels for semantic segmentation.
+
 ```
 # Generate list of filenames from your dataset
-imageList = pytorch_superpixels.list_loader.image_list(
+image_list = pytorch_superpixels.list_loader.ImageList(
     'pascal-seg', './VOCdevkit/VOC2012', 'trainval')
 # Use this list to create and save 100 superpixel dataset
-pytorch_superpixels.preprocess.create_masks(imageList, 100)
+pytorch_superpixels.preprocess.create_masks(image_list, 100)
 
 # -----------------------------------------------
 # code that sets up model, optimizer, dataloader, metrics, etc.
@@ -48,7 +54,8 @@ for (images, labels, masks) in trainloader:
         loss.backward()
         optimizer.step()
 ```
-_______________________________________
+
+---
 
 This project stems from a module I created for use in my master's thesis.
 
diff --git a/example.py b/example.py
@@ -0,0 +1,97 @@
+from numpy.core.fromnumeric import product
+from skimage.segmentation.boundaries import find_boundaries
+import torch
+import numpy as np
+from torchvision.io import read_image
+from torchvision.models.segmentation import fcn_resnet50
+import matplotlib.pyplot as plt
+from torchvision.transforms.functional import convert_image_dtype
+from torchvision.utils import draw_segmentation_masks
+from torchvision.utils import make_grid
+from pytorch_superpixels.runtime import superpixelise
+from skimage.segmentation import slic, mark_boundaries, find_boundaries
+from pathlib import Path
+from multiprocessing import Pool
+from os import cpu_count
+from functools import partial
+
+import torchvision.transforms.functional as F
+
+def show(imgs):
+    if not isinstance(imgs, list):
+        imgs = [imgs]
+    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
+    for i, img in enumerate(imgs):
+        img = img.detach()
+        img = F.to_pil_image(img)
+        axs[0, i].imshow(np.asarray(img))
+        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
+    plt.tight_layout()
+    plt.show()
+
+
+if __name__ == "__main__":
+    sem_classes = [
+        '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
+        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
+        'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
+    ]
+    sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(sem_classes)}
+    image_dims = [420, 640]
+    images = [read_image(str(img)) for img in Path("data").glob("*.jpg")]
+    images = [F.center_crop(image, image_dims) for image in images]
+    image_size = product(image_dims)
+
+    batch_int = torch.stack(images)
+    batch = convert_image_dtype(batch_int, dtype=torch.float)
+
+    # permute because slic expects the last dimension to be channel
+    with Pool(processes = cpu_count()-1) as pool:
+        # re-order axes for skimage
+        args = [x.permute(1,2,0) for x in batch]
+        # 100 segments
+        kwargs = {"n_segments":100, "start_label":0, "slic_zero":True}
+        func = partial(slic, **kwargs)
+        masks_100sp = pool.map(func, args)
+        # 1000 segments
+        kwargs["n_segments"] = 1000
+        func = partial(slic, **kwargs)
+        masks_1000sp = pool.map(func, args)
+
+
+    model = fcn_resnet50(pretrained=True, progress=False)
+    model = model.eval()
+
+    normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
+    outputs = model(batch)['out']
+
+    normalized_masks = torch.nn.functional.softmax(outputs, dim=1)
+    num_classes = normalized_masks.shape[1]
+
+    def generate_all_class_masks(outputs, masks):
+        masks = np.stack(masks)
+        masks = torch.from_numpy(masks)
+        outputs_sp = superpixelise(outputs, masks)
+        normalized_masks_sp = torch.nn.functional.softmax(outputs_sp, dim=1)
+        return normalized_masks_sp[i].argmax(0) == torch.arange(num_classes)[:, None, None]
+
+    to_show = []
+    for i, image in enumerate(images):
+        # before
+        all_classes_masks = normalized_masks[i].argmax(0) == torch.arange(num_classes)[:, None, None]
+        to_show.append(draw_segmentation_masks(image, masks=all_classes_masks, alpha=.6))
+        # after 100
+        all_classes_masks_sp = generate_all_class_masks(outputs, masks_100sp)
+        to_show.append(draw_segmentation_masks(image, masks=all_classes_masks_sp, alpha=.6))
+        # show superpixel boundaries
+        boundaries = find_boundaries(masks_100sp[i])
+        to_show[-1][0:2, boundaries] = 255
+        to_show[-1][2, boundaries] = 0
+        # after 1000
+        all_classes_masks_sp = generate_all_class_masks(outputs, masks_1000sp)
+        to_show.append(draw_segmentation_masks(image, masks=all_classes_masks_sp, alpha=.6))
+        # show superpixel boundaries
+        boundaries = find_boundaries(masks_1000sp[i])
+        to_show[-1][0:2, boundaries] = 255
+        to_show[-1][2, boundaries] = 0
+    show(make_grid(to_show, nrow=6))
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.isort]
+profile = "black"
diff --git a/pytorch_superpixels/list_loader.py b/pytorch_superpixels/list_loader.py
@@ -1,21 +1,23 @@
-from os.path import join
-from os.path import exists
+from os.path import exists, join
 
 
-class image_list:
-    def __init__(self, dataset, path, split='trainval'):
+class ImageList:
+    def __init__(self, dataset, path, split="trainval"):
         # Configured datasets
-        datasets = {'pascal-seg': {'listPath': 'ImageSets/Segmentation/',
-                                   'imagePath': 'JPEGImages',
-                                   'targetPath': 'SegmentationClass'}
-                    }
+        datasets = {
+            "pascal-seg": {
+                "listPath": "ImageSets/Segmentation/",
+                "imagePath": "JPEGImages",
+                "targetPath": "SegmentationClass",
+            }
+        }
         # Object variables
         self.split = split
         self.dataset = dataset
         self.path = path
-        self.listPath = join(path, datasets[dataset]['listPath'])
-        self.imagePath = join(path, datasets[dataset]['imagePath'])
-        self.targetPath = join(path, datasets[dataset]['targetPath'])
+        self.listPath = join(path, datasets[dataset]["listPath"])
+        self.imagePath = join(path, datasets[dataset]["imagePath"])
+        self.targetPath = join(path, datasets[dataset]["targetPath"])
         self.list = []
         # Does the split exist?
         list_path = join(self.listPath, self.split + ".txt")
diff --git a/pytorch_superpixels/metrics.py b/pytorch_superpixels/metrics.py
@@ -1,6 +1,7 @@
+from os.path import join
+
 import torch
 from tqdm import tqdm
-from os.path import join
 
 
 def mask_accuracy(target, mask):
@@ -18,7 +19,7 @@ def mask_accuracy(target, mask):
 def dataset_accuracy(superpixels):
     # Generate image list
     if superpixels is not None:
-        image_list = get_image_list('trainval_super')
+        image_list = get_image_list("trainval_super")
     else:
         image_list = get_image_list()
 
@@ -56,10 +57,7 @@ def find_usable_images(split, superpixels):
     # Generate image list
     image_list = get_image_list(split)
     usable = []
-    target_dir = join(
-        root,
-        "SegmentationClass/pre_encoded_{}_sp".format(superpixels)
-    )
+    target_dir = join(root, "SegmentationClass/pre_encoded_{}_sp".format(superpixels))
     for image_number in image_list:
         target_name = image_number + ".pt"
         target_path = join(target_dir, target_name)
@@ -83,7 +81,7 @@ def fix_broken_images(superpixels):
 def find_size_variance(superpixels):
     # Generate image list
     if superpixels is not None:
-        image_list = get_image_list('trainval_super')
+        image_list = get_image_list("trainval_super")
     else:
         image_list = get_image_list()
     mask_dir = "SegmentationClass/{}_sp".format(superpixels)
diff --git a/pytorch_superpixels/preprocess.py b/pytorch_superpixels/preprocess.py
@@ -1,63 +1,64 @@
+from multiprocessing import cpu_count
+from os import mkdir
+from os.path import exists, join
+
+import torch
+from joblib import Parallel, delayed
 from skimage.io import imread
 from skimage.segmentation import slic
 from skimage.util import img_as_float
-from multiprocessing import cpu_count
-from joblib import Parallel, delayed
-from os.path import exists
-from os.path import join
 from tqdm import tqdm
-from os import mkdir
-import torch
 
 
-def create_masks(imageList, numSegments=100, limOverseg=None):
+def create_masks(image_list, num_segments=100, oversegmentation_limit=None):
     # Save mask and target for image number
     def save_mask(image_number):
         # Load image/target pair
-        image_path = join(imageList.imagePath, image_number + ".jpg")
-        target_path = join(imageList.targetPath, image_number + ".png")
+        image_path = join(image_list.imagePath, image_number + ".jpg")
+        target_path = join(image_list.targetPath, image_number + ".png")
         image = img_as_float(imread(image_path))
         target = imread(target_path)
         target = torch.from_numpy(target)
         # Save paths
-        saveDir = join(imageList.path, 'SuperPixels')
-        maskDir = join(saveDir, '{}_sp_mask'.format(numSegments))
-        targetDir = join(saveDir, '{}_sp_target'.format(numSegments))
+        save_dir = join(image_list.path, "SuperPixels")
+        mask_dir = join(save_dir, "{}_sp_mask".format(num_segments))
+        targetDir = join(save_dir, "{}_sp_target".format(num_segments))
         # Check that directories exist
-        if not exists(saveDir):
-            mkdir(saveDir)
-        if not exists(maskDir):
-            mkdir(maskDir)
+        if not exists(save_dir):
+            mkdir(save_dir)
+        if not exists(mask_dir):
+            mkdir(mask_dir)
         if not exists(targetDir):
             mkdir(targetDir)
         # Define save paths
-        mask_save_path = join(maskDir, image_number + ".pt")
+        mask_save_path = join(mask_dir, image_number + ".pt")
         target_save_path = join(targetDir, image_number + ".pt")
         # If they haven't already been made, make them
         if not exists(mask_save_path) and not exists(target_save_path):
             # Create mask for image/target pair
             mask, target_s = create_mask(
                 image=image,
                 target=target,
-                numSegments=numSegments,
-                limOverseg=limOverseg
+                num_segments=num_segments,
+                oversegmentation_limit=oversegmentation_limit,
             )
             torch.save(mask, mask_save_path)
             torch.save(target_s, target_save_path)
 
     num_cores = cpu_count()
-    inputs = tqdm(imageList.list)
+    inputs = tqdm(image_list.list)
     # Iterate through all images utilising all CPU cores
-    Parallel(n_jobs=num_cores)(delayed(save_mask)(image_number)
-                               for image_number in inputs)
+    Parallel(n_jobs=num_cores)(
+        delayed(save_mask)(image_number) for image_number in inputs
+    )
 
 
-def create_mask(image, target, numSegments, limOverseg):
+def create_mask(image, target, num_segments, oversegmentation_limit):
     # Perform SLIC segmentation
-    mask = slic(image, n_segments=numSegments, slic_zero=True)
+    mask = slic(image, n_segments=num_segments, slic_zero=True)
     mask = torch.from_numpy(mask)
 
-    if limOverseg is not None:
+    if oversegmentation_limit is not None:
         # Oversegmentation step
         superpixels = mask.unique().numel()
         overseg = superpixels
@@ -78,15 +79,16 @@ def create_mask(image, target, numSegments, limOverseg):
                 # Find minority class in superpixel
                 min_class = min(class_hist)
                 # Is the minority class large enough for oversegmentation
-                above_threshold = min_class > class_hist.sum() * limOverseg
+                above_threshold = min_class > class_hist.sum() * oversegmentation_limit
                 if above_threshold:
                     # Leaving one class in supperpixel be
                     for c in classes[1:]:
                         # Adding to the oversegmentation offset
                         overseg += 1
                         # Add offset to class c in the mask
-                        mask[segment_mask] += (target[segment_mask]
-                                               == c).long() * overseg
+                        mask[segment_mask] += (
+                            target[segment_mask] == c
+                        ).long() * overseg
 
     # (Re)define how many superpixels there are and create target_s
     superpixels = mask.unique().numel()
diff --git a/pytorch_superpixels/runtime.py b/pytorch_superpixels/runtime.py
diff --git a/setup.cfg b/setup.cfg
diff --git a/tests/test.py b/tests/test.py