--- title: "car prices" author: "Chris Parrish" date: "January 18, 2016" output: pdf_document --- car prices reference: - Cannon, et al., Stat2, chapter 04, examples 4.7-4.8 Import the data. {r} data <- read.csv("ThreeCars.csv", header=TRUE) head(data) dim(data)  Scatterplot matrix. {r} pairs(~ Price + Age + Mileage, data=data, col="darkred")  Average car prices {r} prices.lm1 <- lm(Price ~ Porsche + Jaguar, data=data)  BMW: r round(coef(prices.lm1)[1], 3) Porsche: r round(coef(prices.lm1)[1] + coef(prices.lm1)[2], 3) Jaguar: r round(coef(prices.lm1)[1] + coef(prices.lm1)[3], 3) {r} options(show.signif.stars=FALSE) summary(prices.lm1)  Price vs. Mileage. {r} plot(Price ~ Mileage, data=data, las=1, type="n") points(data[data$Porsche=="1", c(4, 2)], pch=20, col="sandybrown") points(data[data$BMW=="1", c(4, 2)], pch=20, col="palegreen") points(data[data$Jaguar=="1", c(4, 2)], pch=20, col="steelblue") legend(x="topright", pch=20, inset=0.02, legend=c("Porsche", "BMW", "Jaguar"), col=c("sandybrown", "palegreen", "steelblue"))  Multiple regression: same slope, separate intercepts $$Price \sim Mileage + Porsche + Jaguar$$ {r} prices.lm2 <- lm(Price ~ Mileage + Porsche + Jaguar, data=data) summary(prices.lm2)  Illustration. {r} plot(Price ~ Mileage, data=data, las=1, type="n") points(data[data$Porsche=="1", c(4, 2)], pch=20, col="sandybrown") points(data[data$BMW=="1", c(4, 2)], pch=20, col="palegreen") points(data[data$Jaguar=="1", c(4, 2)], pch=20, col="steelblue") legend(x="topright", pch=20, inset=0.02, legend=c("Porsche", "BMW", "Jaguar"), col=c("sandybrown", "palegreen", "steelblue")) abline(a=coef(prices.lm2)[1] + coef(prices.lm2)[3], b=coef(prices.lm2)[2], lty=3, col="sandybrown") abline(a=coef(prices.lm2)[1], b=coef(prices.lm2)[2], lty=1, col="palegreen") abline(a=coef(prices.lm2)[1] + coef(prices.lm2)[4], b=coef(prices.lm2)[2], lty=2, col="steelblue")  Multiple regression: separate slopes, separate intercepts $$Price \sim Mileage + Porsche + Jaguar + Porsche \cdot Mileage + Jaguar \cdot Mileage$$ {r} prices.lm3 <- lm(Price ~ Mileage + Porsche + Jaguar + Porsche:Mileage + Jaguar:Mileage, data=data) summary(prices.lm3)  Illustration. {r} plot(Price ~ Mileage, data=data, las=1, type="n") points(data[data$Porsche=="1", c(4, 2)], pch=20, col="sandybrown") points(data[data$BMW=="1", c(4, 2)], pch=20, col="palegreen") points(data[data\$Jaguar=="1", c(4, 2)], pch=20, col="steelblue") legend(x="topright", pch=20, inset=0.02, legend=c("Porsche", "BMW", "Jaguar"), col=c("sandybrown", "palegreen", "steelblue")) abline(a=coef(prices.lm3)[1] + coef(prices.lm3)[3], b=coef(prices.lm3)[2] + coef(prices.lm3)[5], lty=3, col="sandybrown") abline(a=coef(prices.lm3)[1], b=coef(prices.lm3)[2], lty=1, col="palegreen") abline(a=coef(prices.lm3)[1] + coef(prices.lm3)[4], b=coef(prices.lm3)[2] + coef(prices.lm3)[6], lty=2, col="steelblue")  Nested (or incremental) F test. $$Price \sim Mileage + Porsche + Jaguar$$ $$Price \sim Mileage + Porsche + Jaguar + Porsche \cdot Mileage + Jaguar \cdot Mileage$$ {r} anova(prices.lm2, prices.lm3)  Residuals. {r} plot(predict(prices.lm2), rstudent(prices.lm2), # studentized residuals pch=20, col="darkred") abline(h=0, col="orange", lty="dashed") qqnorm(resid(prices.lm2), col="orchid") # residuals qqline(resid(prices.lm2), col="orange")  Prediction. Exercise: Write a function which will produce one row of the table at a time, and then call it three times to produce the table. {r} new.data <- data.frame(Mileage=50, Porsche=1, Jaguar=0, BMW=0) porsche.ci <- predict(prices.lm2, new.data, interval="confidence") porsche.pi <- predict(prices.lm2, new.data, interval="prediction") new.data <- data.frame(Mileage=50, Porsche=0, Jaguar=1, BMW=0) jaguar.ci <- predict(prices.lm2, new.data, interval="confidence") jaguar.pi <- predict(prices.lm2, new.data, interval="prediction") new.data <- data.frame(Mileage=50, Porsche=0, Jaguar=0, BMW=1) BMW.ci <- predict(prices.lm2, new.data, interval="confidence") BMW.pi <- predict(prices.lm2, new.data, interval="prediction") prices <- rbind(porsche=c(porsche.ci, porsche.pi[2:3]), jaguar=c(jaguar.ci, jaguar.pi[2:3]), BMW=c(BMW.ci, BMW.pi[2:3])) rownames(prices) <- c("Porsche", "Jaguar", "BMW") colnames(prices) <- c("fit", "CI:lwr", "CI:upr", "PI:lwr", "PI:upr") prices