# R notes # ================================= # index to these notes # standardized and studentized residuals; # t test for simple linear regression; # sample (Pearson) correlation coefficient, r; Fisher's Z transformation of r # ================================= # Rosner, chapter 11 # ================================= # standardized and studentized residuals # http://www.stat.wisc.edu/courses/st572-larget/Spring2007/handouts04-4.pdf # https://stat.ethz.ch/pipermail/r-help/2011-August/286427.html # http://stat.ethz.ch/R-manual/R-patched/library/MASS/html/studres.html ys.Hat <- a + b * xs birthweight.lm <- lm(birthweight ~ estriol) plot(x=ys.Hat, y=rstandard(birthweight.lm), pch=19, col="dark red", xlab="yHat", ylab="residual", main="standardized residuals") plot(x=ys.Hat, y=rstudent(birthweight.lm), pch=19, col="orange", xlab="yHat", ylab="residual", main="studentized residuals") # ================================= # example 11.15 obstetrics # from example 11.8 # t test for simple linear regression # http://cran.r-project.org/doc/contrib/Faraway-PRA.pdf # http://www.montefiore.ulg.ac.be/~kvansteen/GBIO0009-1/ac20092010/Class8/Using%20R%20for%20linear%20regression.pdf # http://stattrek.com/ap-statistics-4/test-slope.aspx b b.se <- sqrt(S.y.x.squared / L.xx); b.se # test statistic t <- b / b.se; t # critical value alpha <- .05 critical.values <- qt(1 - alpha/2, df=n - 2) * c(-1, 1); critical.values accept.H0 <- critical.values[1] < t & t < critical.values[2]; accept.H0 # p value p <- 2 * (1 - pt(t, df=n-2)); p significance(p) # R : aov birthweight.lm <- lm(birthweight ~ estriol); birthweight.lm summary(birthweight.lm) # ================================= # studentized residuals # ================================= # standardized and studentized residuals # from example 11.3 and 11.5 # http://www.stat.wisc.edu/courses/st572-larget/Spring2007/handouts04-4.pdf # https://stat.ethz.ch/pipermail/r-help/2011-August/286427.html # http://stat.ethz.ch/R-manual/R-patched/library/MASS/html/studres.html estriol <-c(7,9,9,12,14,16,16,14,16,16,17,19,21,24,15,16,17,25,27,15,15,15,16,19,18,17,18,20,22,25,24) birthweight <- c(25,25,25,27,27,27,24,30,30,31,30,31,30,28,32,32,32,32,34,34,34,35,35,34,35,36,37,38,40,39,43) # regression line birthweight.lm <- lm(birthweight ~ estriol) coeffs = coefficients(birthweight.lm); coeffs plot(estriol, birthweight, pch=19, col="dark red", main="Greene-Touchstone study") abline(birthweight.lm, col="orange") xs <- estriol ys <- birthweight n <- length(xs) L.xx <- sum(xs * xs) - sum(xs)^2 / n L.yy <- sum(ys * ys) - sum(ys)^2 / n L.xy <- sum(xs * ys) - sum(xs) * sum(ys) / n b <- L.xy / L.xx; b a <- mean(ys) - b * mean(xs); a SS.total <- L.yy; SS.total SS.reg <- L.xy^2 / L.xx; SS.reg SS.res <- SS.total - SS.reg; SS.res k <- 1 MS.reg <- SS.reg / k; MS.reg MS.res <- SS.res / (n - k - 1); MS.res S.y.x.squared <- MS.res # residuals ys.Hat <- a + b * xs; ys.Hat es.Hat <- ys - ys.Hat; es.Hat xBar <- mean(xs) es.Hat.sd <- sqrt(S.y.x.squared * (1 - 1/n - (xs - xBar)^2 / L.xx)); es.Hat.sd es.studentized.residual <- es.Hat / es.Hat.sd; es.studentized.residual plot(x=ys.Hat, y=es.studentized.residual, pch=19, col="dark red", xlab="yHat", ylab="residual", main="studentized residuals") abline(h=0, col="orange") # standard and studentized residuals in R birthweight.lm <- lm(birthweight ~ estriol) plot(x=ys.Hat, y=rstandard(birthweight.lm), pch=19, col="dark red", xlab="yHat", ylab="residual", main="standardized residuals in R") plot(x=ys.Hat, y=rstudent(birthweight.lm), pch=19, col="orange", xlab="yHat", ylab="residual", main="studentized residuals in R") # ln transformation ln.ys.Hat <- log(ys.Hat) plot(x=ln.ys.Hat, y=es.studentized.residual, pch=19, col="dark red", xlab="ln(yHat)", ylab="residual", main="standardized residuals vs. ln(ys.Hat)") abline(h=0, col="orange") # ================================= # sample (Pearson) correlation coefficient, r r <- L.xy / sqrt(L.xx * L.yy) # ================================= # Fisher's Z transformation of r # note by Venables: # https://stat.ethz.ch/pipermail/r-help/2002-July/023762.html # z0 <- R2Z(rho0) # Z ~ N(mu=z0, var=1 / (n - 3)) # lambda == (z - z0) * sqrt(n - 3) ~ N(0, 1) R2Z <- function(r) { (1 / 2) * log((1+r)/(1-r)) } z <- R2Z(r) # ================================= # example 11.30 Fisher's Z transformation of r r <- .38 z <- R2Z(r); z # ================================= # example 11.31 Fisher's Z transformation of r r <- .38 z <- R2Z(r); z n <- 100 rho0 <- .50 z0 <- R2Z(rho0); z0 z <- R2Z(r); z lambda <- (z - z0) * sqrt(n - 3); lambda # p value p <- 2 * pnorm(lambda); p significance(p) # =================================