agcompvis/unet.py at main · zmswanson/agcompvis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Code generate with help of YouTube video "Implementing original U-Net from scratch using PyTorch"
# by Abhishek Thakur. Thanks for the great video! From there I started modifying the base U-Net
# model and adding supporting modules.

import torch
import torch.nn as nn
from torch.nn.modules.activation import ReLU
# from pytorch_model_summary import summary

def double_conv(in_c, out_c):
    """ Performs 2 3x3 convolutions with ReLU activation """
    conv = nn.Sequential(
        # Consider using 'padding' and 'padding_mode' to implement edge handling.
        # Also, what are dilation and bias?
        nn.Conv2d(in_c, out_c, kernel_size=3, padding=1, padding_mode='reflect'),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_c, out_c, kernel_size=3, padding=1, padding_mode='reflect'),
        nn.ReLU(inplace=True)
    )

    return conv

def crop_and_copy(enc_tensor, dec_tensor):
    enc_size = enc_tensor.size()[2] # grab the spatial size of the tensor on encoding side of U-Net
    dec_size = dec_tensor.size()[2] # grab the spatial size of the tensor on decoding side of U-Net

    delta = (enc_size - dec_size) // 2

    return torch.cat([enc_tensor[:, :, delta:enc_size-delta, delta:enc_size-delta], dec_tensor], 1)

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.down_conv_1 = double_conv(4, 64)
        self.down_conv_2 = double_conv(64, 128)
        self.down_conv_3 = double_conv(128, 256)
        self.down_conv_4 = double_conv(256, 512)
        self.down_conv_5 = double_conv(512, 1024)

        self.up_transp_1 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=2, stride=2)
        self.up_transp_2 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=2, stride=2)
        self.up_transp_3 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=2, stride=2)
        self.up_transp_4 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=2, stride=2)

        self.up_conv_1 = double_conv(1024, 512)
        self.up_conv_2 = double_conv(512, 256)
        self.up_conv_3 = double_conv(256, 128)
        self.up_conv_4 = double_conv(128, 64)

        self.out_conv = nn.Conv2d(in_channels=64, out_channels=9, kernel_size=1, stride=1)

    def forward(self, image):
        """ Implements U-Net model forward propogation. """

        # Consider replacing the Encoding layers with a pre-trained model like Resnet trained on
        # ImageNet (this will provide generic feature extraction) and then write a decoder that
        # takes the outputs from pre-trained encoder and extracts the features of interest.

        # Encoding layer 1
        x1 = self.down_conv_1(image) # x1 passed across network to decoding side
        x2 = self.max_pool_2x2(x1)

        # Encoding layer 2
        x3 = self.down_conv_2(x2) # x3 passed across network to decoding side
        x4 = self.max_pool_2x2(x3)

        # Encoding layer 3
        x5 = self.down_conv_3(x4) # x5 passed across network to decoding side
        x6 = self.max_pool_2x2(x5)

        # Encoding layer 4
        x7 = self.down_conv_4(x6) # x7 passed across network to decoding side
        x8 = self.max_pool_2x2(x7)

        # Encoding layer 5
        x9 = self.down_conv_5(x8)

        # Decoding layer 1
        y1 = self.up_transp_1(x9)
        y2 = self.up_conv_1(crop_and_copy(x7, y1))

        # Decoding layer 2
        y3 = self.up_transp_2(y2)
        y4 = self.up_conv_2(crop_and_copy(x5, y3))

        # Decoding layer 3
        y5 = self.up_transp_3(y4)
        y6 = self.up_conv_3(crop_and_copy(x3, y5))

        # Decoding layer 4
        y7 = self.up_transp_4(y6)
        y8 = self.up_conv_4(crop_and_copy(x1, y7))

        out_map = self.out_conv(y8)

        print(out_map.size())


if __name__ == "__main__":
    # 572 x 572 is used because it allows us to go to depth 32x32 (5 layers).
    # If we are going to use 572 x 572 images for U-Net, then I need to add a pipeline stage that
    # grabs the surrounding 60 pixels (original = 512x512) if available or zeros if not.
    # This can be accomplished using the field id and xy coordinates.
    # Otherwise, we need to implement edge handling for the U-Net model.
    image = torch.rand((1, 4, 512, 512))

    unet = UNet()
    # print(summary(unet, image))
    unet(image)