--- title: "NFL" author: "Chris Parrish" date: "January 8, 2016" output: pdf_document --- NFL reference: - Cannon, et al., Stat2, chapter 03, examples 3.1-3.7 Import the data. {r} data <- read.csv("NFL2007Standings.csv", header=TRUE) head(data) dim(data)  Scatterplot matrix. {r} pairs(~ WinPct + PointsFor + PointsAgainst, data=data, pch=20, col="darkred")  Multiple regression. {r} NFL.lm <- lm(WinPct ~ PointsFor + PointsAgainst, data=data) plot(predict(NFL.lm), data$WinPct, pch=20, col="darkred") abline(a=0, b=1, col="orange")  Linear model.$\widehat{WinPct} =$r round(coef(NFL.lm)[1], 3) + r round(coef(NFL.lm)[2], 3)$PointsFor$+ r round(coef(NFL.lm)[3], 3)$PointsAgainst${r} options(show.signif.stars=FALSE) summary(NFL.lm) confint(NFL.lm) anova(NFL.lm)  Standard error of the multiple regression model.$\hat{\sigma}_\epsilon = \sqrt{MSE}${r} sqrt(0.00533)  Coefficient of multiple determination. $R^2 = \frac{SSModel}{SSTotal}$ {r} SSModel <- 1.01724 + 0.16411 SSE <- 0.15446 SSTotal <- SSModel + SSE R.sq <- SSModel / SSTotal R.sq  Adjusted coefficient of multiple determination. $R_{Adj}^2 = 1 - \frac{SSE / (n - k - 1)}{SSTotal / (n - 1)}$ {r} n <- 32 k <- 2 Adj.R.sq <- 1 - (SSE / (n - k - 1)) / (SSTotal / (n - 1)) Adj.R.sq  Correlation of$y$and$\hat{y}${r} y <- data$WinPct y.hat <- predict(NFL.lm) r <- cor(y, y.hat) r r^2  Residuals. {r} plot(predict(NFL.lm), resid(NFL.lm), pch=20, col="darkred") abline(h=0, col="orange", lty="dashed") hist(resid(NFL.lm), col="wheat") qqnorm(resid(NFL.lm), col="orchid") qqline(resid(NFL.lm), col="orange")  Effect plots. {r message=FALSE, fig.width=6, fig.height=4.0} library(alr4) plot(effect("PointsFor", NFL.lm)) plot(effect("PointsAgainst", NFL.lm))  Prediction. {r} new.data <- data.frame(PointsFor=393, PointsAgainst=260) # Pittsburgh Steelers, 2007 (y.hat <- predict(NFL.lm, new.data)) y <- 0.625 (residual <- y - y.hat)  CI and PI. {r} new.data <- data.frame(PointsFor=400, PointsAgainst=350) # hypothetical conf <- predict(NFL.lm, new.data, interval="confidence") pred <- predict(NFL.lm, new.data, interval="prediction") intervals <- rbind(conf, pred) row.names(intervals) <- c("CI", "PI") intervals