forked from mjskay/when-ish-is-my-bus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbeta-regression-for-p~known_p.R
executable file
·163 lines (144 loc) · 5.57 KB
/
beta-regression-for-p~known_p.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#######
# Fit beta regression for p ~ known_p.
# See ../survey_analysis.md for an analysis of the results of this model.
#
# The model stored in the repo was not run with the same number of iterations as in
# the paper since that model takes up about 800MB, so we use a smaller number of
# iterations here (the only real difference is slightly choppier looking posteriors,
# the main conclusoins are the same). Set final_model = TRUE to run the model with
# the same parameters as the model reported in the paper.
#######
final_model = FALSE
source("R/load-survey-data.R")
library(tidybayes)
library(metabayes)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
#MODEL SPEC
m.metastan = metastan(
data = {
n : int(lower=1)
restricted_p : vector(lower=0,upper=1)[n]
known_p : vector[n]
n_viz : int(lower=1)
viz[n] : int(lower=1, upper=n_viz)
n_layout : int(lower=1)
layout[n] : int(lower=1, upper=n_layout)
n_gender : int(lower=1)
gender[n] : int(lower=1, upper=n_gender)
n_participant : int(lower=1)
participant[n] : int(lower=1, upper=n_participant)
n_pquestion : int(lower=1)
pquestion[n] : int(lower=1, upper=n_pquestion)
},
transformed_data = {
layout_coding : vector[2]
gender_coding : vector[2]
logit_known_p : vector[n]
#use simple coding for layout and gender so that the coefficients for them
#correspond to the differences between the two levels, and
#other factors are estimated with respect to the mean of the
#two levels
layout_coding[1] = -0.5
layout_coding[2] = 0.5
gender_coding[1] = -0.5
gender_coding[2] = 0.5
#pre-compute logit to save a bit on inner loop
for (i in 1:n) {
logit_known_p[i] = logit(known_p[i])
}
},
parameters = {
b[2] : vector[n_viz]
d_viz : vector[n_viz]
d_layout : real
d_gender : real
d_participant : vector[n_participant]
d_participant_tau : real
d_viz_participant[n_viz] : vector[n_participant]
d_viz_participant_tau : real
d_pquestion : vector[n_pquestion]
d_pquestion_tau : real
},
model = {
d_participant_sigma : real
d_viz_participant_sigma : real
d_pquestion_sigma : real
mu : vector[n]
phi : vector[n]
alpha : vector[n]
beta : vector[n]
#REGRESSION MODEL
for (i in 1:n) {
#regression submodels
#mean
mu[i] <- inv_logit(b[1,viz[i]] + b[2,viz[i]] * logit_known_p[i])
#dispersion
#negation here makes coefficients represent dispersion (instead of precision) to ease interpretation
#see Smithson (2006)
phi[i] <- exp(-(
d_viz[viz[i]]
+ d_layout * layout_coding[layout[i]]
+ d_gender * gender_coding[gender[i]]
+ d_participant[participant[i]]
+ d_viz_participant[viz[i], participant[i]]
+ d_pquestion[pquestion[i]]
))
#beta-distributed observations
#reparameterized beta distribution in terms of mu (mean) and phi (precision)
#See Smithson M, Verkuilen J (2006). “A Better Lemon Squeezer? Maximum-Likelihood Regression
#with Beta-Distributed Dependent Variables.” Psychological Methods, 11(1), 54–71.
alpha[i] <- mu[i] * phi[i]
beta[i] <- (1 - mu[i]) * phi[i]
}
restricted_p ~ beta(alpha, beta)
#prior is slope 0, intercept 1
#informed priors based on HOPS
b[1] ~ normal(0, 0.4)
b[2] ~ normal(1, 0.3)
#informed priors based on HOPS
d_viz ~ normal(-2.6, 0.6)
#difference in layout is on the order of what we expect for dispersion from HOPS model, centered at 0
d_layout ~ normal(0, 0.6)
#difference in gender is on the order of what we expect for dispersion from HOPS model, centered at 0
d_gender ~ normal(0, 0.6)
#random effect for participant: some participants are better/worse at this task
#informed priors based on HOPS
d_participant_tau ~ gamma(58.23573, 122.9211)
d_participant_sigma <- 1/sqrt(d_participant_tau)
d_participant ~ normal(0, d_participant_sigma)
#random effect for participant*viz: some participants are better/worse at some vizes
d_viz_participant_tau ~ gamma(58.23573, 122.9211)
d_viz_participant_sigma <- 1/sqrt(d_viz_participant_tau)
for (v in 1:n_viz) {
d_viz_participant[v] ~ normal(0, d_viz_participant_sigma)
}
#random effect for pquestion: some questions are harder/easier
d_pquestion_tau ~ gamma(58.23573, 122.9211)
d_pquestion_sigma <- 1/sqrt(d_pquestion_tau)
d_pquestion ~ normal(0, d_pquestion_sigma)
})
m.stan = stan_model(model_code = as.character(m.metastan))
#DATA
data_list = dfl %>%
compose_data() %>%
#drop variables not actually used in the model so stan doesn't complain
{.[intersect(names(.), variables(m.metastan))]}
#RUN MODEL
if (final_model) {
iterations = 20000
thin = 8
chains = 16
} else {
iterations = 10000
thin = 20
chains = 4
}
#RUN THE SAMPLER
fit = sampling(m.stan,
data = data_list, seed = NA,
iter = iterations, thin = thin,
chains = chains)
#SAVE THE RESULTS
save(fit, file = "fits/fit-p~known_p.RData")