--- title: "perch" author: "Chris Parrish" date: "January 8, 2016" output: pdf_document --- perch reference: - Cannon, et al., Stat2, chapter 04, examples 4.4, 4.6 Import the data. ```{r} data <- read.csv("Perch.csv", header=TRUE) head(data) dim(data) ``` Scatterplot matrix. ```{r} pairs(~ Weight + Length + Width, data=data, col="darkred") ``` Separate linear models for length and width ```{r} # length plot(Weight ~ Length, data=data, pch=20, col="darkred") perch.length.lm <- lm(Weight ~ Length, data=data) coef(perch.length.lm) # width plot(Weight ~ Width, data=data, pch=20, col="darkred") perch.width.lm <- lm(Weight ~ Width, data=data) coef(perch.width.lm) ``` Multiple regression with interaction. ```{r} perch.lm1 <- lm(Weight ~ Length * Width, data=data) options(show.signif.stars=FALSE) summary(perch.lm1) ``` Multiple regression without interaction. ```{r} perch.lm2 <- lm(Weight ~ Length + Width, data=data) summary(perch.lm2) ``` Residuals. ```{r} # with interaction hist(resid(perch.lm1), col="wheat") qqnorm(resid(perch.lm1), col="orchid") qqline(resid(perch.lm1), col="orange") plot(predict(perch.lm1), resid(perch.lm1), pch=20, col="darkred") abline(h=0, col="orange") # without interaction plot(predict(perch.lm2), resid(perch.lm2), pch=20, col="darkred") abline(h=0, col="orange") ``` Complete second-order model. ```{r} perch.lm3 <- lm(Weight ~ Length + Width + I(Length^2) + I(Width^2) + Length:Width, data=data) summary(perch.lm3) anova(perch.lm3) ``` Correlated predictors. ```{r} with(data, cor(cbind(Weight, Length, Width))) plot(Length ~ Width, data=data, pch=20, col="darkred") perch.lm4 <- lm(Weight ~ Length + Width, data=data) summary(perch.lm4) perch.lm5 <- lm(Weight ~ Length + Width + Length:Width, data=data) summary(perch.lm5) ``` Collinearity. ```{r} cor.matrix <- with(data, cor(cbind(Length, Width, Length^2, Width^2, Length * Width))) var.names <- c("Length", "Width", "Length^2", "Width^2", "Length*Width") rownames(cor.matrix) <- colnames(cor.matrix) <- var.names cor.matrix ``` Nested F-tests. ```{r} # perch.lm5 <- lm(Weight ~ Length + Width + Length:Width, data=data) # perch.lm3 <- lm(Weight ~ Length + Width + I(Length^2) + I(Width^2) + Length:Width, data=data) anova(perch.lm5, perch.lm3) ``` Identify unusual points. ```{r} library(car) scatterplot(Length ~ Width, data=data, id.n=3, labels=data\$Obs) perch.diag <- ls.diag(perch.lm1) # Weight ~ Length * Width, data=data summary(perch.diag) ``` Leverage. ```{r} n <- nrow(data) k <- 3 # k = number of regressors typical.leverage <- (k + 1) / n hi.threshold <- 3 * typical.leverage data[perch.diag\$hat > hi.threshold, ] ``` Standardized residuals. ```{r} hi.threshold <- 3 * typical.leverage std.res.threshold <- 2 idx <- (1:n)[perch.diag\$hat > hi.threshold | abs(perch.diag\$std.res) > std.res.threshold] results <- cbind(data[idx, ], perch.lm1\$fit[idx], perch.diag\$hat[idx], perch.lm1\$resid[idx], perch.diag\$std.res[idx]) names(results) <- c("Obs", "Weight", "Length", "Width", "Fit", "Hat", "Resid", "Std.Res") results ``` Identify unusual points. ```{r fig.width=6, fig.height=8.5} library(car) scatterplot(Length ~ Width, data=data, id.n=3, labels=data\$Obs) ``` Dot plot of \$h_i\$ values. ```{r fig.width=6, fig.height=2.8} stripchart(perch.diag\$hat, pch=20, col="darkred", method="stack", xlab=expression(h[i])) abline(v=hi.threshold, col="orange", lty="dashed") ``` Cook's distance. ```{r} hi.threshold <- 3 * typical.leverage std.res.threshold <- 2 idx <- (1:n)[perch.diag\$hat > hi.threshold | abs(perch.diag\$std.res) > std.res.threshold] results <- cbind(data[idx, ], perch.diag\$std.res[idx], perch.diag\$hat[idx], perch.diag\$cooks[idx]) names(results) <- c("Obs", "Weight", "Length", "Width", "Std.Res", "Hat", "Cook's D") results plot(perch.diag\$hat, perch.diag\$std.res, pch=20, col="darkred", ylim=c(-4.5,4.5), xlab="Leverage", ylab="Standardized residuals") abline(h=c(-3, -2, 2, 3), col="orange", lty="dashed") abline(v=c(-3, -2, 2, 3) * typical.leverage, col="orange", lty="dashed") cook <- function(pm, D, h){ k <- 3 # number of predictors val <- pm * sqrt(D * (k + 1) * (1 - h) / h) return(val) } curve(cook(1, 1, x), from=0.05, to=0.45, col="orange", lty="dashed", add=TRUE) curve(cook(1, 0.5, x), from=0.05, to=0.45, col="orange", lty="dashed", add=TRUE) curve(cook(-1, 0.5, x), from=0.05, to=0.45, col="orange", lty="dashed", add=TRUE) curve(cook(-1, 1, x), from=0.05, to=0.45, col="orange", lty="dashed", add=TRUE) text(x=perch.diag\$hat[40], perch.diag\$std.res[40], labels=c("40"), pos=2) text(x=perch.diag\$hat[52], perch.diag\$std.res[52], labels=c("52"), pos=2) text(x=perch.diag\$hat[55], perch.diag\$std.res[55], labels=c("55"), pos=2) ```