# Faraway, chapter 3 # ====================================== # economic data set on 50 countries library(faraway) data(savings) savings # ====================================== # 3.2 testing examples # linear model with all of the predictors # H0 : beta == 0 # p is very small, so H0 is rejected g <- lm(sr ~ pop15 + pop75 + dpi + ddpi, savings) summary(g) # use F test to perform the same analysis g <- lm(sr ~ pop15 + pop75 + dpi + ddpi, savings) (tss <- sum((savings\$sr-mean(savings\$sr))^2)) (rss <- deviance(g)) df.residual(g) (fstat <- ((tss-rss)/4)/(rss/df.residual(g))) 1-pf(fstat,4,df.residual(g)) # testing just one predictor # can one predictor be dropped? (pop15) # H0 : betai == 0 g2 <- lm(sr ~ pop75 + dpi + ddpi, savings) (rss2 <- deviance(g2)) (fstat <- (deviance(g2)-deviance(g))/(deviance(g)/df.residual(g))) 1-pf(fstat,1,df.residual(g)) # alternative approach : t test sqrt(fstat) (tstat <- summary(g)\$coef[2,3]) 2*(1-pt(sqrt(fstat),45)) # more convenient way to compare two nested models anova(g2,g) # testing a pair of predictors # (can pop75 and ddpi be excluded from the model?) g3 <- lm(sr ~ pop15+dpi , savings) anova(g3,g) # testing a subspace # H0 : betai == betaj g <- lm(sr ~ .,savings) gr <- lm(sr ~ I(pop15+pop75)+dpi+ddpi,savings) anova(gr,g) # can one coeficient be set to a specific value? # H0 : betai == .05 gr <- lm(sr ~ pop15+pop75+dpi+offset(0.5*ddpi),savings) anova(gr,g) # an equivalent t test (tstat <- (0.409695-0.5)/0.196197) 2*pt(tstat,45) tstat^2 # ====================================== # 3.3 permutation tests # calculate the p value for the F statistic for this model g <- lm(sr ~ pop75+dpi,savings) summary(g) gs <- summary(g) gs\$fstat # run a permutation test and collect the F statistics # what proportion was larger than our F statistic? # compare that proportion to the original p value fstats <- numeric(4000) for(i in 1:4000){ ge <- lm(sample(sr) ~ pop75+dpi,data=savings) fstats[i] <- summary(ge)\$fstat[1] } length(fstats[fstats > 2.6796])/4000 # permutation test for just one predictor (pop75) # examine t value summary(g)\$coef[2,3] # run permutation test and collect t values # what proportion had larger t values? # compare that to original p value tstats <- numeric(4000) for(i in 1:4000){ ge <- lm(sr ~ sample(pop75) + dpi, savings) tstats[i] <- summary(ge)\$coef[2,3] } mean(abs(tstats) > 1.6783) # ====================================== # 3.4 confidence intervals for beta # 955 CI bor beta.pop75 g <- lm(sr ~ ., savings) qt(0.975,45) c(-1.69-2.01*1.08,-1.69+2.01*1.08) # 955 CI bor beta.ddpi c(0.41-2.01*0.196,0.41+2.01*0.196) # all univariate CI's confint(g) # 95% CI for beta.pop15 and beta.pop75 library(ellipse) plot(ellipse(g,c(2,3)), type="l",xlim=c(-1,0), col="dark red") points(0,0, col="blue") points(coef(g)[2],coef(g)[3],pch=18, col="green") abline(v=confint(g)[2,],lty=2, col="slateblue3") abline(h=confint(g)[3,],lty=2, col="slateblue3") # examine the correlation of the two predicotrs cor(savings\$pop15,savings\$pop75) summary(g,corr=TRUE)\$corr # ====================================== # 3.5 confidence intervals for predictions g <- lm(Species ~ Area+Elevation+Nearest+Scruz+Adjacent,gala) x0 <- c(1,0.08,93,6.0,12.0,0.34) (y0 <- sum(x0*coef(g))) qt(0.975,24) x <- model.matrix(g) xtxi <- solve(t(x) %*% x) (bm <- sqrt(x0 %*% xtxi %*% x0) *2.064 * 60.98) c(y0-bm,y0+bm) bm <- sqrt(1+x0 %*% xtxi %*% x0) *2.064 * 60.98 c(y0-bm,y0+bm) x0 <- data.frame(Area=0.08,Elevation=93,Nearest=6.0,Scruz=12,Adjacent=0.34) str(predict(g,x0,se=TRUE)) predict(g,x0,interval="confidence") predict(g,x0,interval="prediction") grid <- seq(0,100,1) p <- predict(g,data.frame(Area=0.08,Elevation=93,Nearest=grid,Scruz=12,Adjacent=0.34),se=T,interval="confidence") matplot(grid,p\$fit,lty=c(1,2,2),type="l",xlab="Nearest",ylab="Species") rug(gala\$Nearest) data(odor) odor x <- as.matrix(cbind(1,odor[,-1])) t(x) %*% x g <- lm(odor ~ temp + gas + pack, odor) summary(g,cor=T) g <- lm(odor ~ gas + pack, odor) summary(g) g <- lm(sr ~ pop15 + pop75 + dpi + ddpi, savings) summary(g) g <- update(g, . ~ . - pop15) summary(g) g <- lm(sr ~ pop15 + pop75 + dpi + ddpi, savings) summary(g) g2 <- lm(sr ~ pop75 + dpi + ddpi, savings) summary(g2) g3 <- lm(sr ~ pop75 + ddpi, savings) summary(g3) g4 <- lm(sr ~ pop75, savings) summary(g4) x0 <- data.frame(pop15=32,pop75=3,dpi=700,ddpi=3) predict(g,x0) predict(g2,x0) predict(g3,x0) predict(g4,x0)