# Efron and Tibshirani, chapter 02 # ===================================================== # interesting references # Efron et al., 1996, Bootstrap confidence levels for phylogeneticâ€‰trees, PNAS # http://www.pnas.org/content/93/23/13429.full # Efron and Tibshirani, 1986, Bootstrap Methods ... # http://profluming.com/Article/UploadFiles/200910/2009100407252456.pdf # ===================================================== # mice # mice survival data, in days, with and without treatment treatment <- c(94, 197, 16, 38, 99, 141, 23) control <- c(52, 104, 146, 10, 51, 30, 40, 27, 46) # beanplot library(beanplot) beanplot(treatment, control, side="b", col=list("yellow", "orange"), xlab="Treatment - Control", main="Mouse Data") legend("topright", bty="n", c("Treatment", "Control"), fill = c("yellow", "orange")) # se of the mean (treatment.mean <- mean(treatment)) (treatment.sd <- sd(treatment)) (treatment.n <- length(treatment)) (treatment.se <- treatment.sd / sqrt(treatment.n)) (control.mean <- mean(control)) (control.sd <- sd(control)) (control.n <- length(control)) (control.se <- control.sd / sqrt(control.n)) # standard error of the difference of the means (diff.se <- sqrt(treatment.se^2 + control.se^2)) # medians (median.treatment <- median(treatment)) (median.control <- median(control)) (median.difference <- median.treatment - median.control) # => 48 # bootstrap estimates of standard error of the mean and median of treatment group, mouse data treatment <- c(94, 197, 16, 38, 99, 141, 23) n <- length(treatment) N <- 50 mean.hat.star <- numeric(N) median.hat.star <- numeric(N) for (i in 1:N){ samp <- sample(1:n, n, replace=TRUE) mean.hat.star[i] <- mean(treatment[samp]) median.hat.star[i] <- median(treatment[samp]) } hist(mean.hat.star, col="dark red") hist(median.hat.star, col="navyblue") sd(mean.hat.star) # => 26.67048 sd(median.hat.star) # => 38.79496 Ns <- c(50, 100, 250, 500, 1000) sd.mean.hat.star <- c(26.67048, 24.3416, 23.60291, 23.2366, 23.29813) sd.median.hat.star <- c(38.79496, 37.91628, 41.08875, 36.81213, 38.18136) plot(sd.mean.hat.star, xaxt="n", ylim=c(15,42), pch=19, col="dark red", xlab="n.boot", ylab="days", main="Estimated standard error of the mean and median") axis(side=1, at=1:5, labels=Ns) points(sd.median.hat.star, pch=19, col="navyblue") legend("bottomleft", legend=c("se.mean", "se.median"), pch=19, col=c("dark red", "navyblue")) # bootstrap estimates of standard error of the median of control group, mouse data control <- c(52, 104, 146, 10, 51, 30, 40, 27, 46) n <- length(control) N <- 100 median.hat.star <- numeric(N) for (i in 1:N){ samp <- sample(1:n, n, replace=TRUE) median.hat.star[i] <- median(control[samp]) } hist(median.hat.star, col="navyblue") sd(median.hat.star) # => 10.92239 # bootstrap estimate of standard error of the difference of medians, treatment and control groups, mouse data # using n.boot == 100 (se.difference <- sqrt(37.91628^2 + 10.92239^2)) # => 39.45812 # observed difference of medians median.difference # => 48 # observed difference / se.difference median.difference / se.difference # => 1.21648 # concl : the observed difference of the medians is only 1.2 se's away from 0, so is not significant