Murat Koptur 24 Ağustos 2018
awards <- read_csv("../data/awards.csv",
col_types = cols(id = col_skip(), prog = col_factor(levels = c("1", "2", "3"))))
## # A tibble: 6 x 3
## num_awards prog math
## <int> <fct> <int>
## 1 1 3 41
## 2 1 1 41
## 3 1 3 44
## 4 1 3 42
## 5 1 3 40
## 6 1 1 42
awards_melted <- melt(awards)
## Using prog as id variables
## prog variable value
## 1 3 num_awards 1
## 2 1 num_awards 1
## 3 3 num_awards 1
## 4 3 num_awards 1
## 5 3 num_awards 1
## 6 1 num_awards 1
ggplot(data = awards_melted, aes(x = value)) +
geom_histogram(aes(y = ..ncount..)) +
geom_density(aes(y = ..scaled..)) +
facet_wrap(~variable, scales = "free") +
labs(x = "Values", y = "Frequencies", title = "Histograms")
awards$math <- scale(awards$math)
model1 <- glm(num_awards ~ math + prog, data = awards, family = poisson)
## Call:
## glm(formula = num_awards ~ math + prog, family = poisson, data = awards)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.96335 -1.14818 -0.01392 0.35710 2.52541
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.48897 0.19620 -2.492 0.0127 *
## math 0.33520 0.07817 4.288 1.8e-05 ***
## prog2 0.45262 0.22475 2.014 0.0440 *
## prog3 0.56172 0.24748 2.270 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Dispersion parameter for poisson family taken to be 1)
## Null deviance: 228.83 on 199 degrees of freedom
## Residual deviance: 198.05 on 196 degrees of freedom
## AIC: 496.36
## Number of Fisher Scoring iterations: 5
model2 <- stan_glm(num_awards ~ math + prog, data = awards, family = poisson,
prior = normal(0, 10), prior_intercept = normal(0, 10))
## Model Info:
## function: stan_glm
## family: poisson [log]
## formula: num_awards ~ math + prog
## algorithm: sampling
## priors: see help('prior_summary')
## sample: 4000 (posterior sample size)
## observations: 200
## predictors: 4
## Estimates:
## mean sd 2.5% 25% 50% 75% 97.5%
## (Intercept) -0.5 0.2 -0.9 -0.6 -0.5 -0.4 -0.1
## math 0.3 0.1 0.2 0.3 0.3 0.4 0.5
## prog2 0.5 0.2 0.0 0.3 0.5 0.6 0.9
## prog3 0.6 0.3 0.1 0.4 0.6 0.7 1.0
## mean_PPD 1.0 0.1 0.8 0.9 1.0 1.0 1.2
## log-posterior -252.2 1.4 -255.8 -252.9 -251.9 -251.1 -250.4
## Diagnostics:
## mcse Rhat n_eff
## (Intercept) 0.0 1.0 1997
## math 0.0 1.0 2485
## prog2 0.0 1.0 2291
## prog3 0.0 1.0 2054
## mean_PPD 0.0 1.0 3751
## log-posterior 0.0 1.0 1624
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
posterior_interval(model2, prob = 0.95)
## 2.5% 97.5%
## (Intercept) -0.89457959 -0.1447066
## math 0.18111692 0.4915252
## prog2 0.03168288 0.9214785
## prog3 0.07135645 1.0449510
plot(model2, plotfun = "areas", prob = 0.95)