--- title: "Porsche prices" author: "Chris Parrish" date: "January 7, 2016" output: pdf_document --- Porsche prices references: - Cannon, et al., Stat2, chapter 01, examples 1.1-1.5 - Cannon, et al., Stat2, chapter 02 - [Porsche](http://www.porsche.com/) Import the data. {r} data <- read.csv("PorschePrice.csv", header=TRUE) head(data, 3) dim(data)  View the data. {r} plot(data$Mileage, data$Price, pch=20, col="darkred") Porsche.lm <- lm(Price ~ Mileage, data=data) abline(Porsche.lm, col="orange")  Linear model. $\widehat{price} =$ r round(coef(Porsche.lm)[1], 3) + r round(coef(Porsche.lm)[2], 3) $mileage$ {r} options(show.signif.stars=FALSE) summary(Porsche.lm) anova(Porsche.lm)  Regression (= residual) standard error. $\widehat{\sigma_e} = \sqrt{MSE} =$ r round(sqrt(51.4), 3) Residuals. {r} plot(fitted(Porsche.lm), resid(Porsche.lm), pch=20, col="darkred") abline(h=0, col="orange", lty="dashed") hist(resid(Porsche.lm), col="wheat") qqnorm(resid(Porsche.lm), col="orchid") qqline(resid(Porsche.lm), col="orange")  Prediction. {r} new.data <- data.frame(Mileage=50) predict(Porsche.lm, new.data)  T-test for slope of simple linear model. $t = \frac{\hat{\beta}_1}{SE_{\hat{\beta}_1}}, \quad df=n-2$ {r} summary(Porsche.lm)$coefficients  CI for slope of simple linear model. $\hat{\beta}_1 \pm t^* \cdot SE_{\hat{\beta}_1}$ {r} beta.hat <- -0.5894009 alpha <- 0.05 n <- 30 t.star <- qt(c(alpha/2, 1 - alpha/2), df=n-2) se <- 0.05664847 ci <- beta.hat + t.star * se ci  ANOVA test for simple linear regression. $F = \frac{MSModel}{MSE}, \quad df_1 = 1, \quad df_2 = n-2$ {r} anova(Porsche.lm)  Coefficient of determination. $r^2 = \frac{SSModel}{SSE}$ {r} summary(Porsche.lm)$r.squared  Inference for correlation. $\hat{\beta}_1 = r \cdot \frac{s_Y}{s_X}$ {r} r <- with(data, cor(Price, Mileage)) r r^2  T-test for correlation. $t = \frac{r \sqrt{n - 2}}{\sqrt{1 - r^2}}, \quad df = n-2$ {r} t <- r * sqrt(n - 2) / sqrt(1 - r^2) t  CI for simple linear regression response. $\hat{y} \pm t^* \cdot SE_{\hat{\mu}}$ $SE_{\hat{\mu}} = \hat{\sigma}_\epsilon \sqrt{\frac{1}{n} + \frac{(x^* - \bar{x})^2}{\sum{(x - \bar{x})^2}}}$ {r} new.data <- data.frame(Mileage=50) predict(Porsche.lm, new.data, interval="confidence")  PI for simple linear regression response. $\hat{y} \pm t^* \cdot SE_{\hat{y}}$ $SE_{\hat{y}} = \hat{\sigma}_\epsilon \sqrt{1 + \frac{1}{n} + \frac{(x^* - \bar{x})^2}{\sum{(x - \bar{x})^2}}}$ {r} new.data <- data.frame(Mileage=50) predict(Porsche.lm, new.data, interval="prediction")  Illustration. Function predict.plots from Cannon, et al., R Student Manual (slightly modified). {r} predict.plots <- function(x, y, xlab, ylab, conf.level=.95) { # x = explanatory variable; # y = response variable. model <- lm(y~x) new <- seq(min(x), max(x), length=101) CI <- predict(model, list(x = new), int="confidence", level=conf.level) PI <- predict(model, list(x = new), int="prediction", level=conf.level) plot(x, y, ylim=range(y, PI[, 3]), las=1, pch=20, col="darkred", xlab=xlab, ylab=ylab) abline(model, col="orange") # to obtain solid regression line points(new, CI[ , 2], type="l", col=2, lty=2) points(new, CI[ , 3], type="l", col=2, lty=2) points(new, PI[ , 2], type="l", col=3, lty=3) points(new, PI[ , 3], type="l", col=3, lty=3) legend(x="topright", legend=c("Regression", "95% CI", "95% PI"), lty=1:3, col=c("orange", 2, 3)) }  {r} with(data, predict.plots(Mileage, Price, "Mileage", "Price"))