-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRF_SpectralParaPred.R
More file actions
164 lines (117 loc) · 5.46 KB
/
RF_SpectralParaPred.R
File metadata and controls
164 lines (117 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# --------------------------------------------------------------------------- #
# Random Forest Regression for the modelling of (grassland) parameters based on spectral input data.
# An adjusted version of this script was used for the analysis presented in the PFG contribution:
# Schwieder et al. (in review): Estimating grassland parameters from Sentinel-2: A model comparison study.
#
# Author: Marcel Schwieder
# Email: marcel.schwieder@geo.hu-berlin.de
# Date: 02/06/2020
#
# --------------------------------------------------------------------------- #
# load required packages
library(randomForest)
library(raster)
library(ggplot2)
library(gridExtra)
# Define function to split data in training and validation
# adapted from https://gist.github.com/stephenturner/834760
# documentation: https://gettinggeneticsdone.blogspot.com/2011/02/split-data-frame-into-testing-and.html
splitdf <- function(dataframe, seed=NULL) {
if (!is.null(seed)) set.seed(seed)
index <- 1:nrow(dataframe)
trainindex <- sample(index, trunc(length(index)/n))
trainset <- dataframe[trainindex, ]
testset <- dataframe[-trainindex, ]
list(trainset=trainset,testset=testset)
}
### Data import
# Import field data intended for modelling as data frame.
# Tables should follow the following format:
# Rows: Samples (one field observation per line)
# Columns: Spectral bands + response Variable
InputData = read.table("*.csv")
# load raster image and crop to desired extent
RS_rst <- stack("*.TIF")
# crop image to study area
UL <- # upper left coordinate
UR <- # upper right coordinate
LL <- # lower left coordinate
LR <- # lower right coordinate
cropmask <- extent(c(UL, UR, LL, LR))
RS_rst_cropped <- crop(RS_rst, cropmask)
# Convert raster to data frame and re-scale reflectance values if necessary
rstDF <- as.data.frame(RS_rst_cropped)
rstDF <- rstDF/10000
# change the raster band names
clNames <- colnames(InputData[1:9]) # i.e. band names of df
colnames(rstDF) <- clNames
### Initialize data frame to store data, models and evaluation metrics per iteration
# define split of data into training and validation
n <-(10/3)
# empty matrix for storing model performance
RFperformance <- matrix(nrow = 100, ncol = 6)
# statistical measured of the regression
colnames(RFperformance) <- c("R2", "RMSE", "relRMSE_mean", "relRMSE_range", "relRMSE_quant", "expVar")
# empty list for raster prediction
RSTpredictions <- list()
# empty list for storing RF model training and validation data
trainData <- list()
validData <- list()
RFmodel <- list()
### Loop of Random Forest Regression
# If the response variable is in the last column varPred = 0, else numerical value that indicates the position of the variable from the last column
# (e.g. varPred = 1 if the response variable is in the second last column)
varPred <- 0
# Define the last columns that should be considered as prediction variables as seen from the end of the table (e.g., subVar = 1 if all but the last
# column should be used for prediction)
subVar <- 1
for (i in 1:100){
# print current status of the loop and RF model
cat("Starting iteration ", i, "using", tail(names(InputData), 1), "/n")
# split data in training and validation data
splitData <- splitdf(InputData)
valid <- splitData$trainset
train <- splitData$testset
# tune Random Forest Regression
rfrModel <- tuneRF(train[,1:(ncol(train)-subVar)], (train[,ncol(train)-varPred]), ntreeTry =500, stepFactor=1.5, improve=0.02, trace=TRUE, doBest=TRUE, plot=F, importance=TRUE)
# predict RF model
predict.val.rf <- predict(rfrModel, valid[,1:(ncol(valid)-subVar)])
# evaluate RF model
r_square.rf <- cor(valid[,ncol(valid)-varPred], predict.val.rf)^2
RMSE.rf <- sqrt((sum((predict.val.rf-valid[,ncol(valid)-varPred])^2))/length(predict.val.rf))
varExp <- rfrModel$rsq[500]*100
rfrModel$importance
# save model performance
RFperformance[i,1] <- r_square.rf
RFperformance[i,2] <- RMSE.rf
RFperformance[i,3] <- RMSE.rf/mean(valid[,ncol(valid)-varPred])
RFperformance[i,4] <- RMSE.rf/ diff(range(valid[,ncol(valid)-varPred]))
RFperformance[i,5] <- RMSE.rf/(quantile(valid[,ncol(valid)-varPred], 0.75)-quantile(valid[,ncol(valid)-varPred], 0.25))
RFperformance[i,6] <- varExp
# save model parameter
trainData[[i]] <- train
validData[[i]] <- valid
RFmodel[[i]] <- rfrModel
# plot RF Model
plot(rfrModel)
# apply RF model to raster df
predict_RST <- predict(rfrModel, rstDF)
# create raster variable based on one image band
RSTpred <- RS_rst_cropped[[1]]
# assign values to raster
values(RSTpred) <- predict_RST
# save raster prediction
RSTpredictions[[i]] <- RSTpred
}
### Create mean and std RF prediction raster
# stack all raster predictions
predStack <- stack(RSTpredictions)
# compute mean raster prediction
finalRST <- raster::calc(predStack, fun = mean)
# compute standard deviation (sd) of all raster prediction
finalRST_std <- raster::calc(predStack, fun = sd)
# save mean raster
writeRaster(finalRST, filename=paste0("/Raster/", Sensor, "_pred_mean_", varPredName, ".tif"), format="GTiff", overwrite=TRUE)
# save sd (std) raster
writeRaster(finalRST_std, filename=paste0("/Raster/", Sensor, "_pred_std_", varPredName, ".tif"), format="GTiff", overwrite=TRUE)
### end