--- title: "doctors" author: "Chris Parrish" date: "January 7, 2016" output: pdf_document --- doctors references: - Cannon, et al., Stat2, chapter 01, example 1.6 Import the data. {r} data <- read.csv("MetroHealth83.csv", header=TRUE) head(data[ , c(1, 2, 4)]) dim(data)  View the data. {r fig.width=6, fig.height=4.2} plot(data$NumHospitals, data$NumMDs, pch=20, col="darkred") doctors.lm <- lm(NumMDs ~ NumHospitals, data=data) abline(doctors.lm, col="orange")  Linear model. $\widehat{NumMDs} =$ r round(coef(doctors.lm)[1], 3) + r round(coef(doctors.lm)[2], 3) $NumHospitals$ {r} options(show.signif.stars=FALSE) summary(doctors.lm) anova(doctors.lm)  Regression (= residual) standard error. $\widehat{\sigma_e} = \sqrt{MSE} =$ r round(sqrt(694292), 3) Residuals. {r} plot(fitted(doctors.lm), resid(doctors.lm), pch=20, col="darkred") abline(h=0, col="orange", lty="dashed") hist(resid(doctors.lm), col="wheat") qqnorm(resid(doctors.lm), col="orchid") qqline(resid(doctors.lm), col="orange")  Transformation. {r fig.width=6, fig.height=4.2} plot(data$NumHospitals, sqrt(data$NumMDs), pch=20, col="darkred") doctors.lm2 <- lm(sqrt(NumMDs) ~ NumHospitals, data=data) abline(doctors.lm2, col="orange")  Residuals. {r} plot(fitted(doctors.lm2), resid(doctors.lm2), pch=20, col="darkred") abline(h=0, col="orange", lty="dashed") doctors.lm2 hist(resid(doctors.lm2), col="wheat") qqnorm(resid(doctors.lm2), col="orchid") qqline(resid(doctors.lm2), col="orange")  Prediction. {r} new.data <- data.frame(NumHospitals=18) y.hat <- predict(doctors.lm2, new.data) NumMDs.hat <- y.hat^2 NumMDs.hat  New linear model. $\widehat{NumMDs} =$ $(r round(coef(doctors.lm)[1], 3) + r round(coef(doctors.lm)[2], 3) \cdot NumHospitals)^2$ Illustration. {r} y.hat <- function(x){ a <- 14.033 b <- 2.915 y.hat <- (a + b * x)^2 return(y.hat) } plot(data$NumHospitals, data$NumMDs, pch=20, col="darkred") curve(y.hat, from=2, to=30, col="olivedrab", add=TRUE)