# law : bootstrap mean and se of correlation coefficient # Efron and Tibshirani, 1994, An Introduction to the Bootstrap, chapter 6 # ============================================= # import data from csv versions of the files "law.data" and "law82.data" # ===================================== # law LSAT law15 <- read.csv("law15.csv") head(law15) str(law15) summary(law15) # ===================================== # law82 LSAT law82 <- read.csv("law82.csv") head(law82) str(law82) summary(law82) # ===================================== # scatterplots plot(law15\$LSAT, law15\$GPA, xlab="LSAT", ylab="GPA", pch=19, col="dark red", main="Law school data - 15 schools") plot(law82\$LSAT, law82\$GPA, xlab="LSAT", ylab="GPA", pch=19, col="peachpuff4", main="Law school data - 82 schools") # ===================================== # sample correlation coefficient (p.49) # between y == LSAT and z == GPA for law15 cor(law15\$LSAT, law15\$GPA) # => 0.7763745 # ===================================== # calculate the bootstrap estimate of the # standard error of the correlation coefficient bootstrap <- function(x, nboot, theta, ...){ data <- matrix(sample(x, size=length(x) * nboot, replace=TRUE), nrow=nboot) return(apply(data, 1, theta, xdata)) } xdata <- cbind(law15\$LSAT, law15\$GPA) n <- nrow(xdata) theta <- function(x, xdata){ cor(xdata[x, 1], xdata[x, 2]) } (results <- bootstrap(1:n, nboot=20, theta, xdata)) # ?plotmath hist(results, col="skyblue3", main=expression(paste(hat(theta)," for correlation coefficients"))) # ===================================== # se.star : apply bootstrap # calculate the bootstrap estimate of the # standard error of the correlation coefficient # for 8 values of B # abstract bootstrap over B == nboot bootstrapET <- function(B){ sd(bootstrap(1:n, nboot=B, theta, xdata)) } bootstrapET(25) # and apply it to a list of Bs # => list of mean.stars for those Bs Bs <- c(25, 50, 100, 200, 400, 800, 1600, 3200) se.hat <- apply(cbind(Bs), 1, bootstrapET) # => 0.1653918 0.1169143 0.1279248 0.1318564 0.1354017 0.1326723 0.1300091 0.1360726 # compare with table 6.1, p.50 round(cbind(Bs, se.hat), 3) # => Bs se.hat # => [1,] 25 0.111 # => [2,] 50 0.117 # => [3,] 100 0.122 # => [4,] 200 0.118 # => [5,] 400 0.117 # => [6,] 800 0.133 # => [7,] 1600 0.135 # => [8,] 3200 0.134 # ===================================== # histogram of 3200 bootstrap replications of corr.hat.star for law15 results <- bootstrap(1:n, nboot=3200, theta, xdata) hist(results, xlab="bootstrap", col="skyblue3", main=expression(paste(hat(theta)," for correlation coefficients, law15"))) # ===================================== # histogram of 3200 bootstrap replications of corr.hat.star for law82 # compare with figure 6.2, p.51 xdata <- cbind(law82\$LSAT, law82\$GPA) n <- nrow(xdata) results <- bootstrap(1:n, nboot=3200, theta, xdata) hist(results, xlab="bootstrap", col="turquoise1", main=expression(paste(hat(theta)," for correlation coefficients, law82"))) # =====================================