Added generic MClayer with MCdense, MCconv layers with tests and an example of MC lenet5 on MNIST (#2)

DwaraknathT · DhairyaLGandhi · web-flow · commit b11fbeab49c7 · 2021-08-30T16:07:50.000+05:30
* Added generic MClayer with MCdense, MCconv layers with tests and a single example of MC lenet5 on MNIST

* fixed module import errors and added removed inner constructor for mclayer

* Added statistics dependency and removed MCLayer inner constructor

* Update src/layers/mclayers.jl

reorder arguments to create conv layer

Co-authored-by: Dhairya Gandhi &lt;dhairya@juliacomputing.com&gt;

* Update src/layers/mclayers.jl

Co-authored-by: Dhairya Gandhi &lt;dhairya@juliacomputing.com&gt;

* Refactor MCLayer to allow arbitrary dropout functions by decoupling dropout and MCLayer forward pass

* Added MClayer to the export list of the module

Co-authored-by: Dhairya Gandhi &lt;dhairya@juliacomputing.com&gt;
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -10,9 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
           - '1.6'
-          - 'nightly'
         os:
           - ubuntu-latest
         arch:
diff --git a/Project.toml b/Project.toml
@@ -3,6 +3,14 @@ uuid = "f38f59f8-88e0-4e11-81d3-0c37501e3a95"
 authors = ["DwaraknathT <dwarakasharma@gmail.com> and contributors"]
 version = "0.1.0"
 
+[deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CalibrationErrors = "33913031-fe46-5864-950f-100836f47845"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReliabilityDiagrams = "e5f51471-6270-49e4-a15a-f1cfbff4f856"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
 [compat]
 julia = "1"
 
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,23 +1,24 @@
 using DeepUncertainty
 using Documenter
 
-DocMeta.setdocmeta!(DeepUncertainty, :DocTestSetup, :(using DeepUncertainty); recursive=true)
+DocMeta.setdocmeta!(
+    DeepUncertainty,
+    :DocTestSetup,
+    :(using DeepUncertainty);
+    recursive = true,
+)
 
 makedocs(;
-    modules=[DeepUncertainty],
-    authors="DwaraknathT <dwarakasharma@gmail.com> and contributors",
-    repo="https://github.com/DwaraknathT/DeepUncertainty.jl/blob/{commit}{path}#{line}",
-    sitename="DeepUncertainty.jl",
-    format=Documenter.HTML(;
-        prettyurls=get(ENV, "CI", "false") == "true",
-        canonical="https://DwaraknathT.github.io/DeepUncertainty.jl",
-        assets=String[],
+    modules = [DeepUncertainty],
+    authors = "DwaraknathT <dwarakasharma@gmail.com> and contributors",
+    repo = "https://github.com/aced-differentiate/DeepUncertainty.jl/blob/{commit}{path}#{line}",
+    sitename = "DeepUncertainty.jl",
+    format = Documenter.HTML(;
+        prettyurls = get(ENV, "CI", "false") == "true",
+        canonical = "https://DwaraknathT.github.io/DeepUncertainty.jl",
+        assets = String[],
     ),
-    pages=[
-        "Home" => "index.md",
-    ],
+    pages = ["Home" => "index.md"],
 )
 
-deploydocs(;
-    repo="github.com/DwaraknathT/DeepUncertainty.jl",
-)
+deploydocs(; repo = "github.com/aced-differentiate/DeepUncertainty.jl")
diff --git a/examples/mcdropout.jl b/examples/mcdropout.jl
@@ -0,0 +1,202 @@
+using Base: AbstractFloat
+## Classification of MNIST dataset 
+## with the convolutional neural network known as LeNet5.
+## This script also combines various
+## packages from the Julia ecosystem with Flux.
+using Flux
+using Flux.Data: DataLoader
+using Flux.Optimise: Optimiser, WeightDecay
+using Flux: onehotbatch, onecold, glorot_normal, label_smoothing
+using Flux.Losses: logitcrossentropy
+using Statistics, Random
+using Logging: with_logger
+using ProgressMeter: @showprogress
+import MLDatasets
+using CUDA
+using Formatting
+
+using DeepUncertainty
+
+# LeNet5 "constructor". 
+# The model can be adapted to any image size
+# and any number of output classes.
+function LeNet5(args; imgsize = (28, 28, 1), nclasses = 10)
+    out_conv_size = (imgsize[1] ÷ 4 - 3, imgsize[2] ÷ 4 - 3, 16)
+
+    return Chain(
+        MCConv((5, 5), imgsize[end] => 6, args.dropout, relu),
+        MaxPool((2, 2)),
+        MCConv((5, 5), 6 => 16, args.dropout, relu),
+        MaxPool((2, 2)),
+        flatten,
+        MCDense(prod(out_conv_size), 120, args.dropout, relu),
+        MCDense(120, 84, args.dropout, relu),
+        MCDense(84, nclasses, args.dropout),
+    )
+end
+
+function get_data(args)
+    xtrain, ytrain = MLDatasets.MNIST.traindata(Float32)
+    xtest, ytest = MLDatasets.MNIST.testdata(Float32)
+
+    xtrain = reshape(xtrain, 28, 28, 1, :)
+    xtest = reshape(xtest, 28, 28, 1, :)
+
+    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
+
+    train_loader = DataLoader(
+        (xtrain, ytrain),
+        batchsize = args.batchsize,
+        shuffle = true,
+        partial = false,
+    )
+    test_loader = DataLoader((xtest, ytest), batchsize = args.batchsize, partial = false)
+
+    return train_loader, test_loader
+end
+
+loss(ŷ, y) = logitcrossentropy(ŷ, y)
+
+function accuracy(preds, labels)
+    acc = sum(onecold(preds |> cpu) .== onecold(labels |> cpu))
+    return acc
+end
+
+function eval_loss_accuracy(args, loader, model, device)
+    l = [0.0f0 for x = 1:args.sample_size]
+    acc = [0 for x = 1:args.sample_size]
+    ece_list = [0.0f0 for x = 1:args.sample_size]
+    ntot = 0
+    mean_l = 0
+    mean_acc = 0
+    mean_ece = 0
+    for (x, y) in loader
+        predictions = []
+        x, y = x |> device, y |> device
+
+        # Loop through each model's predictions 
+        for ensemble = 1:args.sample_size
+            model_predictions = model(x)
+            model_predictions = softmax(model_predictions, dims = 1)
+            push!(predictions, model_predictions)
+            # Calculate individual loss 
+            l[ensemble] += loss(model_predictions, y) * size(model_predictions)[end]
+            acc[ensemble] += accuracy(model_predictions, y)
+            ece_list[ensemble] +=
+                ExpectedCalibrationError(model_predictions |> cpu, onecold(y |> cpu)) *
+                args.batchsize
+        end
+        # Get the mean predictions
+        predictions = Flux.batch(predictions)
+        mean_predictions = mean(predictions, dims = ndims(predictions))
+        mean_predictions = dropdims(mean_predictions, dims = ndims(mean_predictions))
+        mean_l += loss(mean_predictions, y) * size(mean_predictions)[end]
+        mean_acc += accuracy(mean_predictions, y)
+        mean_ece +=
+            ExpectedCalibrationError(mean_predictions |> cpu, onecold(y |> cpu)) *
+            args.batchsize
+        ntot += size(mean_predictions)[end]
+    end
+    # Normalize the loss 
+    losses = [loss / ntot |> round4 for loss in l]
+    acc = [a / ntot * 100 |> round4 for a in acc]
+    ece_list = [x / ntot |> round4 for x in ece_list]
+    # Calculate mean loss 
+    mean_l = mean_l / ntot |> round4
+    mean_acc = mean_acc / ntot * 100 |> round4
+    mean_ece = mean_ece / ntot |> round4
+
+    # Print the per ensemble mode loss and accuracy 
+    for ensemble = 1:args.sample_size
+        @info (format(
+            "Sample {} Loss: {} Accuracy: {} ECE: {}",
+            ensemble,
+            losses[ensemble],
+            acc[ensemble],
+            ece_list[ensemble],
+        ))
+    end
+    @info (format(
+        "Mean Loss: {} Mean Accuracy: {} Mean ECE: {}",
+        mean_l,
+        mean_acc,
+        mean_ece,
+    ))
+    @info "==========================================================="
+    return nothing
+end
+
+## utility functions
+num_params(model) = sum(length, Flux.params(model))
+round4(x) = round(x, digits = 4)
+
+# arguments for the `train` function 
+Base.@kwdef mutable struct Args
+    η = 3e-4             # learning rate
+    λ = 0                # L2 regularizer param, implemented as weight decay
+    batchsize = 32      # batch size
+    epochs = 10          # number of epochs
+    seed = 0             # set seed > 0 for reproducibility
+    use_cuda = true      # if true use cuda (if available)
+    infotime = 1      # report every `infotime` epochs
+    checktime = 5        # Save the model every `checktime` epochs. Set to 0 for no checkpoints.
+    dropout = 0.1
+    sample_size = 10
+end
+
+function train(; kws...)
+    args = Args(; kws...)
+    args.seed > 0 && Random.seed!(args.seed)
+    use_cuda = args.use_cuda && CUDA.functional()
+
+    if use_cuda
+        device = gpu
+        @info "Training on GPU"
+    else
+        device = cpu
+        @info "Training on CPU"
+    end
+
+    ## DATA
+    train_loader, test_loader = get_data(args)
+    @info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples"
+
+    ## MODEL AND OPTIMIZER
+    model = LeNet5(args) |> device
+    @info "LeNet5 model: $(num_params(model)) trainable params"
+
+    ps = Flux.params(model)
+
+    opt = ADAM(args.η)
+    if args.λ > 0 # add weight decay, equivalent to L2 regularization
+        opt = Optimiser(WeightDecay(args.λ), opt)
+    end
+
+    function report(epoch)
+        @info "Test metrics"
+        eval_loss_accuracy(args, test_loader, model, device)
+    end
+
+    ## TRAINING
+    @info "Start Training"
+    report(0)
+    for epoch = 1:args.epochs
+        @showprogress for (x, y) in train_loader
+            # Make copies of batches for ensembles 
+            x = repeat(x, 1, 1, 1, args.sample_size)
+            y = repeat(y, 1, args.sample_size)
+            x, y = x |> device, y |> device
+            gs = Flux.gradient(ps) do
+                ŷ = model(x)
+                loss(ŷ, y)
+            end
+
+            Flux.Optimise.update!(opt, ps, gs)
+        end
+
+        ## Printing and logging
+        epoch % args.infotime == 0 && report(epoch)
+    end
+end
+
+train()
diff --git a/src/DeepUncertainty.jl b/src/DeepUncertainty.jl
@@ -1,5 +1,10 @@
 module DeepUncertainty
 
-# Write your package code here.
+# Export layers 
+export MCLayer, MCDense, MCConv
+export mean_loglikelihood, brier_score, ExpectedCalibrationError, prediction_metrics
+
+include("metrics.jl")
+include("layers/mclayers.jl")
 
 end
diff --git a/src/layers/mclayers.jl b/src/layers/mclayers.jl
@@ -0,0 +1,118 @@
+using Flux
+using Random
+using Test
+using Flux: @functor
+
+"""
+    MCLayer(layer, dropout)
+A generic Monte Carlo dropout layer. Takes in any "traditional" flux 
+layer and a function that implements dropout. Performs the usual layer 
+forward pass and then passes the acitvations through the given dropout function.  
+"""
+struct MCLayer{L,F}
+    layer::L
+    dropout::F
+end
+
+@functor MCLayer
+
+"""
+    MCDense(in, out, dropout_rate, σ=identity; bias=true, init=glorot_uniform)
+    MCDense(layer, dropout_rate)
+
+Creates a traditional dense layer with MC dropout functionality. 
+MC Dropout simply means that dropout is activated in both train and test times 
+
+Reference - Dropout as a bayesian approximation - https://arxiv.org/abs/1506.02142 
+
+The traditional dense layer is a field in the struct MCDense, so all the 
+arguments required for the dense layer can be provided, or the layer can 
+be provided too. The forward pass is the affine transformation of the dense
+layer followed by dropout applied on the resulting activations. 
+
+    y = dropout(σ.(W * x .+ bias), dropout_rate)
+
+# Fields
+- `layer`: A traditional dense layer 
+- `dropout`: A function that implements dropout  
+
+# Arguments 
+- `in::Integer`: Input dimension of features 
+- `out::Integer`: Output dimension of features 
+- `dropout_rate::AbstractFloat`: Dropout rate 
+- `σ::F=identity`: Activation function, defaults to identity
+- `init=glorot_normal`: Initialization function, defaults to glorot_normal 
+"""
+function MCDense(in::Integer, out::Integer, dropout_rate, σ = identity, kwargs...)
+    layer = Flux.Dense(in, out, σ; kwargs...)
+    dropout = (x; k...) -> Flux.dropout(x, dropout_rate; k...)
+    return MCLayer(layer, dropout)
+end
+
+"""
+    MCConv(filter, in => out, σ = identity;
+            stride = 1, pad = 0, dilation = 1, groups = 1, [bias, weight, init])
+    MCConv(layer, dropout_rate)
+
+Creates a traditional Conv layer with MC dropout functionality. 
+MC Dropout simply means that dropout is activated in both train and test times 
+
+Reference - Dropout as a bayesian approximation - https://arxiv.org/abs/1506.02142 
+
+The traditional conv layer is a field in the struct MCConv, so all the 
+arguments required for the conv layer can be provided, or the layer can 
+be provided too. The forward pass is the conv operation of the conv
+layer followed by dropout applied on the resulting activations. 
+
+    y = dropout(Conv(x), dropout_rate)
+
+# Fields
+- `layer`: A traditional conv layer 
+- `dropout_rate::AbstractFloat`: Dropout rate 
+
+# Arguments 
+- `filter::NTuple{N,Integer}`: Kernel dimensions, eg, (5, 5) 
+- `ch::Pair{<:Integer,<:Integer}`: Input channels => output channels 
+- `dropout_rate::AbstractFloat`: Dropout rate 
+- `σ::F=identity`: Activation function, defaults to identity
+- `init=glorot_normal`: Initialization function, defaults to glorot_normal 
+"""
+function MCConv(
+    k::NTuple{N,Integer},
+    ch::Pair{<:Integer,<:Integer},
+    dropout_rate,
+    σ = identity;
+    kwargs...,
+) where {N}
+    layer = Flux.Conv(k, ch, σ; kwargs...)
+    dropout = (x; k...) -> Flux.dropout(x, dropout_rate; k...)
+    return MCLayer(layer, dropout)
+end
+
+function MCConv(
+    w::AbstractArray{T,N},
+    bias,
+    dropout_rate,
+    σ = identity,
+    kwargs...,
+) where {T,N}
+    layer = Flux.Conv(w, bias, σ, kwargs...)
+    dropout = (x; k...) -> Flux.dropout(x, dropout_rate; k...)
+    return MCLayer(layer, dropout)
+end
+
+"""
+The forward pass of a MC layer: Passes the input through the 
+usual layer first and then through a dropout layer. 
+
+# Arguments 
+- `x`: Input tensors 
+- `dropout=true`: Toggle to control dropout, it's preferred to keep 
+dropout always on, but just in case if it's needed. 
+"""
+function (mc::MCLayer)(x; dropout = true)
+    # Layer forward pass 
+    # Dropout on activations 
+    output = mc.dropout(mc.layer(x); active = dropout)
+    return output
+end
diff --git a/src/metrics.jl b/src/metrics.jl
diff --git a/test/layers/mclayers_test.jl b/test/layers/mclayers_test.jl
diff --git a/test/runtests.jl b/test/runtests.jl