3.6 diagnostics

reference:
- ARM chapter 03, github

library(tidyverse)
library(knitr)
# 3.6 diagnostics
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
library(grid)
library(shinystan)

ARM 3.6

data

### Data
source("kidiq.data.R", echo = TRUE)
## 
## > N <- 434
## 
## > kid_score <- c(65, 98, 85, 83, 115, 98, 69, 106, 102, 
## +     95, 91, 58, 84, 78, 102, 110, 102, 99, 105, 101, 102, 115, 
## +     100, 87, 99, 96, 72,  .... [TRUNCATED] 
## 
## > mom_hs <- c(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 
## +     1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 
## +     1, 0, 1, 1, 1, 1, 1, 1, 1, .... [TRUNCATED] 
## 
## > mom_iq <- c(121.117528602603, 89.3618817100663, 115.443164881725, 
## +     99.4496394360723, 92.7457099982118, 107.901837758501, 138.893106071162, 
## +  .... [TRUNCATED] 
## 
## > mom_hs_new <- 1
## 
## > mom_iq_new <- 100

model

kidscore_momiq.stan

fit

### Model: kid_score ~ mom_iq
data.list.2 <- c("N", "kid_score", "mom_iq")
kidscore_momiq.sf <- stan(file='kidscore_momiq.stan', data=data.list.2,
                          iter=500, chains=4)
print(kidscore_momiq.sf, pars = c("beta", "sigma", "lp__"))
## Inference for Stan model: kidscore_momiq.
## 4 chains, each with iter=500; warmup=250; thin=1; 
## post-warmup draws per chain=250, total post-warmup draws=1000.
## 
##             mean se_mean   sd     2.5%      25%      50%      75%    97.5%
## beta[1]    26.01    0.29 5.68    14.24    22.45    25.86    29.29    37.68
## beta[2]     0.61    0.00 0.06     0.49     0.58     0.61     0.64     0.72
## sigma      18.32    0.03 0.64    17.18    17.83    18.31    18.74    19.69
## lp__    -1479.36    0.06 1.18 -1482.19 -1479.98 -1479.06 -1478.47 -1477.96
##         n_eff Rhat
## beta[1]   382    1
## beta[2]   333    1
## sigma     456    1
## lp__      368    1
## 
## Samples were drawn using NUTS(diag_e) at Mon Aug 21 04:22:35 2017.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

figure 3.12

### Figure 3.12
beta.post <- extract(kidscore_momiq.sf, "beta")$beta
beta.mean <- colMeans(beta.post)
resid <- kid_score - (beta.mean[1] + beta.mean[2] * mom_iq)
resid.sd <- sd(resid)

p <- ggplot(data.frame(mom_iq, resid), aes(x = mom_iq, y = resid)) +
    geom_point(shape = 20, color = "darkred") +
    geom_hline(yintercept = 0, color = "orange") +
    geom_hline(yintercept = c(-resid.sd, resid.sd), linetype = "dashed", color = "orange") +
    scale_x_continuous("Mother IQ score", breaks = seq(80, 140, 20)) +
    scale_y_continuous("Residuals", breaks = seq(-60, 40, 20))
print(p)