-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspbayes_demo.R
177 lines (150 loc) · 5.57 KB
/
spbayes_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Load required libraries
library(tidyverse)
library(spBayes)
library(sp)
library(gstat)
### Read in example spatial data
data(meuse)
data <- meuse %>%
drop_na() %>%
mutate(n = row_number()) %>%
rowwise() %>%
mutate(group = sample(c("train", "valid"), 1)) %>%
ungroup()
head(data)
### Fit spatial model
fit.spLM <- function(data) {
spdata <- data.frame(x = data$x, y = data$y, X1 = data$elev, X2 = data$landuse, response = data$copper)
coordinates(spdata) <- spdata[, c("x", "y")]
coords <- as.matrix(data[, c("x", "y")])
colnames(coords) <- c("longitude", "latitude")
# Preparing to run spatial model; get lognormal model phi (spatial decay parameter) estimate
fit.nonspLM <- lm(response ~ X1 + X2, spdata) # fit non-spatial model
n_predictor <- fit.nonspLM$coefficients %>% length() # get number of predictors
z <- resid(fit.nonspLM) # residuals of non-spatial model
spdata$z <- z
test.vgm <- gstat::variogram(z ~ 1, data = spdata)
test.fit <- gstat::fit.variogram(test.vgm, model = gstat::vgm("Exp"))
phi <- test.fit[2, ]$range # extract phi value from fitted variogram
# Set priors: loose priors on beta and residual error variance (tausq), and on spatial variance parameter (sigma.sq), but very tight on Phi (spatial decay parameter).
priors <- list("beta.Norm" = list(rep(0, n_predictor), diag(100, n_predictor)), "phi.Unif" = c(-log(0.05) / (phi * 100), -log(0.05) / (phi / 100)), "sigma.sq.IG" = c(2, 2), "tau.sq.IG" = c(2, 0.1)) # shape and scale for IG
# Set starting and tuning values
starting <- list("phi" = -log(0.05) / phi, "sigma.sq" = 50, "tau.sq" = 1)
tuning <- list("phi" = (log(0.05) / (phi * 100) - log(0.05) / (phi / 100)) / 10, "sigma.sq" = 0.01, "tau.sq" = 0.01)
# Knots for spatial model
# knots = kmeans(coords, 20,iter.max=100)$centers
# Run spatial model
splm <- spLM(response ~ X1 + X2,
data = spdata,
coords = coords,
cov.model = "exponential",
priors = priors,
tuning = tuning,
starting = starting,
n.samples = 10000,
n.report = 1000 # ,
# knots = knots
)
return(splm)
}
splm <- fit.spLM(data = data %>% filter(group == "train"))
### Parameter inference
splm.recover <- function(model) {
# Recover parameter samples, ignoring the first half of the run as burn-in.
splm <- spRecover(model,
get.beta = TRUE,
get.w = F,
start = 5001,
n.report = 1000
)
beta.hat <- splm$p.beta.recover.samples
# theta.hat <- splm$p.theta.recover.samples
df_param <- beta.hat %>%
data.frame() %>%
mutate(i = row_number()) %>%
gather(key = "param", value = "value", -i)
# Plot posterior
p_posterior <- df_param %>%
ggplot() +
geom_density(aes(x = value)) +
facet_wrap(. ~ param, scales = "free") +
theme_classic() +
geom_vline(xintercept = 0, col = "red")
# Coefficient summaries
df_summary <- df_param %>%
group_by(param) %>%
summarise(
median = median(value),
lower = quantile(value, 0.025, names = F),
upper = quantile(value, 0.975, names = F)
) %>%
mutate(sig = case_when(
lower * upper < 0 ~ "ns",
TRUE ~ "*"
))
out <- list(
p_posterior = p_posterior,
df_summary = df_summary
)
return(out)
}
sp_param <- splm.recover(splm)
sp_param$p_posterior
sp_param$df_summary
### Cross validation
splm.cv <- function(model, data) {
data <- data_valid
spdata <- data.frame(x = data$x, y = data$y, X1 = data$elev, X2 = data$landuse, response = data$copper)
coordinates(spdata) <- spdata[, c("x", "y")]
coords <- as.matrix(data[, c("x", "y")])
colnames(coords) <- c("longitude", "latitude")
mat_design <- model.matrix(response ~ X1 + X2, data = spdata) %>%
data.frame() %>%
mutate(n = row_number()) %>%
gather(key = "predictor", value = "value", -n) %>%
group_by(n) %>%
mutate(predictor = fct_expand(predictor, model$X %>%
data.frame() %>%
colnames())) %>%
complete(predictor, fill = list(value = 0)) %>% # complete dataframe with new levels
ungroup() %>%
spread(key = "predictor", value = "value") %>%
select(all_of(model$X %>%
data.frame() %>%
colnames())) %>%
as.matrix()
sp_pred <- spPredict(model,
pred.covars = mat_design,
pred.coords = coords,
start = 5001
)
df_predict <- sp_pred$p.y.predictive.samples %>%
data.frame() %>%
mutate(n = row_number()) %>%
gather(key = "i", value = "value", -n) %>%
group_by(n) %>%
summarise(
median = median(value),
lower = quantile(value, 0.025, names = F),
upper = quantile(value, 0.975, names = F)
) %>%
bind_cols(obs = spdata$response)
p_predict <- df_predict %>%
ggplot() +
geom_point(aes(x = obs, y = median)) +
geom_errorbar(aes(x = obs, ymin = lower, ymax = upper)) +
geom_smooth(aes(x = obs, y = median), method = "lm") +
ggpubr::stat_cor(aes(x = obs, y = median)) +
theme_classic() +
labs(
x = "Observation",
y = "Prediction"
)
out <- list(
df_predict = df_predict,
p_predict = p_predict
)
return(out)
}
sp_cv <- splm.cv(splm, data = data %>% filter(group == "valid"))
sp_cv$p_predict