# Chihara and Hesterberg, chap 8 # Chapter 8 Classical Inference : Hypothesis Testing # ================================= # significance significance <- function(p){ if (.01 <= p & p < .05) { print('results.significant') } else if (.001 <= p & p < .01) { print('results.highly.significant') } else if (p < .001) { print('results.very.highly.significant') } else { print('results.not.statistically.significant') } } # ================================= # Section 8.1 hypothesis tests for means and proportions # Example 8.1 # H0 : mu == 515 # H1 : mu > 515 # normal distribution mean <- 515 xBar <- 555 sd <- 116 n <- 25 alpha <- .05 (z <- (xBar - mean) / (sd / sqrt(n))) # => 1.724138 # p value (p <- 1 - pnorm(z)) # => 0.04234147 significance(p) # => "results.significant" # t distribution s <- 120 (t <- (xBar - mean) / (s / sqrt(n))) # => 1.666667 # p value (p <- 1 - pt(t, df=n-1)) # => 0.05429006 significance(p) # => "results.not.statistically.significant" # ================================= # Example 8.2 # H0 : mu == 7 # H1 : mu < 7 mean <- 7 xBar <- 6.6 s <- 0.8 n <- 15 alpha <- .05 (t <- (xBar - mean) / (s / sqrt(n))) # => -1.936492 # p value (p <- pt(t, df=n-1)) # => 0.03662922 significance(p) # => "results.significant" # ================================= # Example 8.3 a proportion # X ~ binom(200, 0.13) # H0 : p == p0 == .13 # H1 : p != p0 == .13 p0 <- .13 n <- 200 k <- 36 # P(X >= 36) (p <- 1 - pbinom(k-1, size=n, prob=p0)) # => 0.02667494 # two-sided test (p <- 2 * p) # => 0.05334989 significance(p) # => "results.not.statistically.significant" # CLT : normal approximation to the binomial distribution mean <- p0 var <- p0 * (1 - p0) / n # P(X >= 36) == P(X >= 35.5) == P(pHat >= 35.5 / n) a <- 35.5 / n (z <- (a - p0) / sqrt(var)) # => 1.997456 # p value (p <- 1 - pnorm(z)) # => 0.02288781 # two-sided test (p <- 2 * p) # => 0.04577563 significance(p) # => "results.significant" # concl : normal and exact methods gave opposite results # check n * p0 > 10 n * (1 - p0) > 10 # yes, but we might use a stricter criterion for more accuracy (see section 4.3.3) n * p0 > 384 n * (1 - p0) > 384 # ================================= # 8.1.2 comparing two populations # ================================= # Example 8.4 NC babies # H0 : mu1 == mu2 # H1 : mu1 > mu2 xBar1 <- 3472 s1 <- 479 n1 <- 898 xBar2 <- 3257 s2 <- 520 n2 <- 111 (t <- (xBar1 - xBar2) / sqrt(s1^2 / n1 + s2^2 / n2)) # => 4.144176 (nu <- (s1^2 / n1 + s2^2 / n2)^2 / ((s1^2 / n1)^2 /(n1 - 1) + (s2^2 / n2)^2 /(n2 - 1))) # => 134.1037 # p value, assuming H0 (p <- 1 - pt(t, df=134)) # => 3.004391e-05 significance(p) # => "results.very.highly.significant" # use t.test NCBirths2004 <- read.csv("NCBirths2004.csv") head(NCBirths2004) WeightNS <- subset(NCBirths2004, select=Weight, subset=Tobacco=="No", drop=T) WeightS <- subset(NCBirths2004, select=Weight, subset=Tobacco=="Yes", drop=T) t.test(WeightNS, WeightS, alt="greater") par(mfrow=c(2,2)) # set layout hist(WeightNS, xlab="Weight (g)", ylab="Frequency", col="royalblue2", main = "Nonsmokers") qqnorm(WeightNS, col="blue", main = "Nonsmokers") hist(WeightS, xlab="Weight (g)", ylab="Frequency", col="lightcyan4", main = "Smokers") qqnorm(WeightS, col="lightcyan4", main = "Smokers") par(mfrow=c(1,1)) # reset layout # ================================= # Example 8.5 beliefs # H0 : p1 == p2 # H1 : p1 != p2 x1 <- 550 n1 <- 684 x2 <- 425 n2 <- 563 pHat1 <- x1 / n1 pHat2 <- x2 / n2 pHat <- (x1 + x2) / (n1 + n2) se <- sqrt(pHat * (1 - pHat) * (1 / n1 + 1 / n2)) (z <- (pHat1 - pHat2) / se) # => 2.093984 # p value (p <- 1 - pnorm(z)) # two-sided test (p <- 2 * p) # => 0.03626137 significance(p) # => "results.significant" # ================================= # Example 8.6 environmental issues # H0 : p1 == p2 # H1 : p1 > p2 x1 <- 126 n1 <- 197 x2 <- 223 n2 <- 406 pHat1 <- x1 / n1 pHat2 <- x2 / n2 pHat <- (x1 + x2) / (n1 + n2) se <- sqrt(pHat * (1 - pHat) * (1 / n1 + 1 / n2)) (z <- (pHat1 - pHat2) / se) # => 2.10703 # one-sided p value if the proportions are the same (p <- 1 - pnorm(z)) # => 0.01755747 significance(p) # => "results.significant" # ================================= # chi-square test for beliefs x1 <- 550 n1 <- 684 x2 <- 425 n2 <- 563 prop.test(c(x1, x2), c(n1, n2), correct=F) # => p == 0.03626137 # 95 percent confidence interval: # 0.002870906 0.095547134 # ================================= # 8.2 type I and type II errors # ================================= # Example 8.8 SAT mu <- 515 cutoff <- 505 sigma <- 116 n <- 100 # P(xBar < cutoff | mu == 515) == P((xBar - mu) / (sigma / sqrt(n)) < (cutoff - mu) / (sigma / sqrt(n))) # == P(z < a) (a <- (cutoff - mu) / (sigma / sqrt(n))) # => -0.862069 (p <- pnorm(a)) # => 0.1943248 significance(p) # => "results.not.statistically.significant" # ================================= # Example 8.9 lotion # H0 : p == .03 # H1 : p > .03 # X == number of susceptibles ~ binom(50, .03) n <- 50 p <- .03 (prob <- 1 - pbinom(3, n, p)) # => 0.06275993 # ================================= # Example 8.10 SAT # alpha == P(reject H0 | H0 is TRUE) == P(xBar <= c | mu == 515) == P(z <= (c - mean) / (sigma / sqrt(n))) alpha <- .10 mean <- 515 sigma <- 116 n <- 100 (z.a <- qnorm(alpha)) # => -1.281552 # eqn = z.a == (c - mean) / (sigma / sqrt(n)) (c <- mean + z.a * sigma / sqrt(n)) # => 500.134 # ================================= # Example 8.11 craps n <- 60 p <- 1/6 ks <- 0:5 rbind(ks, ps=pbinom(ks, size=n, prob=p)) ks <- 15:20 rbind(ks, ps=1-pbinom(ks, size=n, prob=p)) # critical region : 0:4 and 16:60 # P(X <= 4) + P(X >= 16) == 0.02019212 + 0.01644573 == 0.03663785 # ================================= # Example 8.12 analyst # H0 : theta == 2 # H1 : theta > 2 # Y == # of observation >= .88 # reject H0 if Y >= 5 # P(type I error) == P(Y >= 5 | theta ==2) # theta == 2 => pdf is f(x) = 3x^2 f <- function(x) {3 * x^2} (p <- integrate(f, .88, 1.00)) # => 0.318528 with absolute error < 3.5e-15 # Y ~ binom(n, p) # P(Y >= 5 | theta == 2) n <- 8 p <- 0.318528 (probability <- 1 - pbinom(4, size=n, prob=p)) # => 0.07361352 # ================================= # 8.2.2 type II errors and power # ================================= # Example 8.13 heights # H0 : mu == 30 # H1 : mu < 30 mean1 <- 30 sd <- 6 n <- 30 alpha <- .05 mean2 <- 27 # reject H0 if z == (xBar - mean1) / (sd / sqrt(n)) <= z.05 (z.05 <- qnorm(alpha, lower.tail=TRUE)) # => -1.644854 # reject H0 if xBar <= mean1 + z.05 * sd / sqrt(n) (critical.value <- mean1 + z.05 * sd / sqrt(n)) # => 28.19815 # if the true mean value is mean2, what is the power of this test? # 1 - beta == P(reject H0 | H1 is TRUE) == P( xBar <= critical.value | mu == mean2) # == P((xBar - mean2) / (sd / sqrt(n)) <= (critical.value - mean2) / (sd / sqrt(n))) # == P(z <= a) (a <- (critical.value - mean2) / (sd / sqrt(n))) # => 1.093759 pnorm(a) # => 0.8629697 # ================================= # Example 8.14 mice # mu == mean decrease # H0 : mu == 0 # H1 : mu > 0 mean0 <- 0 mean1 <- 2 sigma <- 3 n <- 20 alpha <- .05 se <- sigma / sqrt(n) # reject H0 if z == (xBar - 0) / se >= z.95 # if xBar >= z.95 * se (z.95 <- qnorm(1 - alpha)) # => 1.644854 (critical.value <- z.95 * se) # => 1.103401 # what is the power of this test if the true mean is mean1 == 2? # 1 - beta == P(reject H0 | H1 is TRUE) == P( xBar >= critical.value | mu == mean1) # == P((xBar - mean1) / (sd / sqrt(n)) >= (critical.value - mean1) / se) # == P(z >= a) (a <- (critical.value - mean1) / se) # => -1.33657 1 - pnorm(a) # => 0.9093185 # ================================= # Example 8.15 sample size power <- .95 alpha <- .01 mean1 <- 1.5 (z.99 <- qnorm(1 - alpha)) # => 2.326348 # require (xBar - mean0) / (sigma / sqrt(n)) >= z.99 # require xBar >= mean0 + z.99 * sigma / sqrt(n) # require power == P(xBar >= mean0 + z.99 * sigma / sqrt(n) | mu == mean1) # require power == P((xBar - mean1) / (sigma / sqrt(n)) >= (mean0 + z.99 * sigma / sqrt(n) - mean1) / (sigma / sqrt(n))) # require power == P(z >= z.99 - mean1 / (sigma / sqrt(n))) (z.05 <- qnorm(1 - power)) # => -1.644854 # require z.05 == z.99 - mean1 / (sigma / sqrt(n)) # require sqrt(n) == (z.99 - z.05) * sigma / mean1 n <- ceiling(((z.99 - z.05) * sigma / mean1)^2); n # => 64 # ================================= # plot of power mu0 <- 0 sigma <- 3 n1 <- 50 n2 <- 25 alpha <- .05 (z.95 <- qnorm(1 - alpha)) # => 1.644854 # take x == mu1 power <- function(x, n){ 1 - pnorm(z.95 - (x - mu0) / (sigma / sqrt(n))) } curve(power(x, n1), from=-1.5, to=1.5, col="dark red", xlab=expression(paste("Alternative ", mu)), ylab="Power", main="Power") curve(power(x, n2), from=-1.5, to=1.5, lty=2, col="navyblue", add=TRUE) legend(x=-1.4, y=.85, c("n=50", "n=25"), lty=c(1,2), lwd=2, col=c("dark red", "navyblue")) # ================================= # 8.3 more on testing # ================================= # Example 8.16 new drug # mu == mean weight loss # H0 : mu == 0 # H1 : mu > 0 mu0 <- 0 xBar <- 4 sd <- 3 n <- 50 (t <- (xBar - mu0) / (sd / sqrt(n))) # => 9.42809 # p value (p <- 1 - pt(t, df=n-1)) # => 6.850076e-13 # 95% CI alpha <- .05 z.975 <- qnorm(1 - alpha/2) e <- z.975 * sd / sqrt(n) (ci <- xBar + c(-e, e)) # => 3.168458 4.831542 # concl : we are highly confident that a patient will lose a small amount of weight # ================================= # 8.4 likelihood ratio tests # ================================= # Example 8.18 likelihood ratio statistic # find the .05 quantile of gamma(9, 8) alpha <- .05 qgamma(alpha, shape=9, rate=8) # => 0.5869034 # ================================= # Example 8.19 likelihood ratio statistic # find the .05 quantile of binom(10, .4) n <- 10 p <- .4 ks <- 0:10 alpha <- .05 plot(ks, pbinom(ks, size=n, prob=p), col="dark red", xlab="k", ylab="probability", main="CDF of binom(10, .4)") abline(h=1 - alpha, lty=2, col="navyblue") 1 - pbinom(6:7, size=n, prob=p) # => 0.05476188 0.01229455 qbinom(1 - .10, size=n, prob=p) # => 6 qbinom(1 - .05, size=n, prob=p) # => 7 qbinom(1 - .01, size=n, prob=p) # => 8 # ================================= # 8.4.2 generalized likelihood ratio tests # ================================= # Example 8.20 likelihood ratio statistic # X ~ N(mu0, sigma=1/n) mu0 <- 8 var <- 1/n n <- 4 alpha <- .05 # alpha == P(xBar < mu0 - d) => d == z.a * sqrt(var) z.a <- qnorm(1 - alpha) (a <- mu0 - z.a * sqrt(var)) # => 7.177573 # in this case, xBar == 6.65 < a, so we reject H0 # =================================