--- title: "ballots" author: "Chris Parrish" date: "January 9, 2016" output: pdf_document --- ballots references: - Cannon, et al., Stat2, chapter 04, example 4.3 Import the data. {r} data <- read.csv("PalmBeach.csv", header=TRUE) head(data) dim(data)  View the data. {r fig.width=6, fig.height=4.2} plot(Buchanan ~ Bush, data=data, pch=20, col="darkred") ballots.lm <- lm(Buchanan ~ Bush, data=data) abline(ballots.lm, col="orange")  Unusual points. {r} ballots.diag <- ls.diag(ballots.lm) summary(ballots.diag)  Leverage of point $(x_i, y_i)$. $$h_i = \frac{1}{n} + \frac{(x_i - \bar{x})^2}{\sum{(x_i - \bar{x})^2}}$$ Points with high leverage. {r} n <- nrow(data) typical.leverage <- 2 / n hi.threshold <- 2 * typical.leverage data[ballots.diag$hat > hi.threshold, ]  Identify unusual points. {r fig.width=6, fig.height=8.5} library(car) scatterplot(Buchanan ~ Bush, data=data, id.n=6, labels=data$County)  Standardized residuals. {r} hi.threshold <- 3 * typical.leverage std.res.threshold <- 2 idx <- (1:67)[ballots.diag$hat > hi.threshold | abs(ballots.diag$std.res) > std.res.threshold] results <- cbind(data[idx, ], ballots.lm$fit[idx], ballots.lm$resid[idx], ballots.diag\$std.res[idx]) names(results) <- c("County", "Buchanan", "Bush", "Fits", "Resids", "Std. Resids") results