--- title: "medical school" author: "Chris Parrish" date: "January 23, 2016" output: pdf_document --- medical school reference: - Cannon, et al., Stat2, chapter 09, example 9.4-9.5, 9.11-9.12 - Cannon, et al., Student R Manual, chapter 9 Import the data. {r} data <- read.csv("MedGPA.csv", header=TRUE) head(data, 4) dim(data)  Scatterplot matrix. {r} pairs(~ Acceptance + GPA + MCAT + Sex, data=data, col="darkred")  Simple regression. Note: Coefficients of the regression line do not agree with the values shown on p.458. {r} plot(MCAT ~ GPA, data=data, pch=20, col="darkred") MCAT.lm <- lm(MCAT ~ GPA, data=data) abline(MCAT.lm, col="orange", lty=2) options(show.signif.stars=FALSE) summary(MCAT.lm)  Logistic regression with glm reference: Cannon, et al., Student R Manual, chapter 9 {r} with(data, plot(GPA, jitter(Acceptance, amount=.05), pch=20, col="darkred", ylab="Acceptance")) MCAT.glm <- glm(Acceptance ~ GPA, data=data, family=binomial) b0 <- coef(MCAT.glm)[1] b1 <- coef(MCAT.glm)[2] library(boot) curve(inv.logit(b0 + b1 * x), col="darkred", add=TRUE) summary(MCAT.glm)  Prediction. {r} new.data <- data.frame(GPA=3.6) predict(MCAT.glm, new.data, type="response")  Odds ratio. $\beta_1 = log(OR)$ $exp(\beta_1) = OR$ A one unit increase in GPA (for instance, from 3.0 to 4.0) increases the odds ratio of acceptance by a factor of 233! A one-tenth unit increase in GPA (for instance, from 3.0 to 3.1) increases the odds ratio of acceptance by a factor of 1.73. {r} coef(MCAT.glm) beta1 <- coef(MCAT.glm)[2] OR <- exp(beta1) OR OR <- exp(0.1 * beta1) OR  Slicing. reference: Cannon, et al., Student R Manual, chapter 9 Compare with Figure 9.18, p.473. {r} sorted.MedGPA.df <- data[order(data$GPA), ] x <- sorted.MedGPA.df$GPA y <- sorted.MedGPA.df$Acceptance x.mat <- matrix(x, ncol=11, nrow=5, byrow=TRUE) x.means <- apply(x.mat, 1, mean) y.mat <- matrix(y, ncol=11, nrow=5, byrow=TRUE) y.yes <- apply(y.mat, 1, sum) y.no <- 11 - y.yes y.prop <- y.yes / (y.yes + y.no) y.prop.adj <- (.5 + y.yes) / (1 + y.yes + y.no) y.logit.adj <- log(y.prop.adj / (1 - y.prop.adj)) plot(x.means, y.logit.adj, pch=20, col="darkred", xlab="GPA", ylab = "adjusted logit") abline(lm(y.logit.adj ~ x.means), col="orange")  Formal inference: tests and intervals. {r} data <- read.csv("MedGPA.csv", header=TRUE) data$GPA10 <- data$GPA * 10 med.school.glm <- glm(Acceptance ~ GPA10, data=data, family=binomial) summary(med.school.glm)  CI for odds ratio.$\beta_1 = log(OR)exp(\beta_1) = OR\$ {r} confint(med.school.glm) # CI for beta1 exp(confint(med.school.glm)) # CI for exp(beta1) = OR