1. cats

reference:
- ISI, exploration 3.5b, p.203

library(tidyverse)

hypotheses

\[H_0 : \pi = 1/3\] \[H_a : \pi < 1/3\]

observed \(\widehat{p}\)

n <- 47000
p.hat.observed <- 0.324
pi.null <- 1 / 3

theory-based HT

pi <- 1 / 3
se <- sqrt(p.hat.observed * (1 - p.hat.observed) / n)
z <- (p.hat.observed - pi.null) / se
z
## [1] -4.323544

\(p.value\)

p.value <- pnorm(z)
p.value
## [1] 7.677135e-06

significance

alpha <- 0.001
reject.H0 <- p.value <= alpha
reject.H0
## [1] TRUE

99.9% CI for \(\pi\)

point.estimate <- p.hat.observed
multiplier <- qnorm(1 - alpha/2)
ci <- point.estimate + multiplier * se * c(-1, 1)
ci
## [1] 0.3168967 0.3311033

2. sampling distribution of \(\widehat{p}\) if \(\pi_{null} = 1/3\) and \(n = 100\)

pi.null <- 1 / 3
n <- 100
alpha <- 0.05

simulation

Design an experiment: “success” means the household has a cat … choose “success” with probabiity 1/3 … repeat 100 times … report the proportion of successes

cats100 <- function(){
  samp <- sample(0:1, size = n, prob = c(1 - pi.null, pi.null), replace = TRUE)
  p.hat <- mean(samp)
  return(p.hat)
}

Repeat the experiment 10 times.

replicate(10, cats100())
##  [1] 0.32 0.28 0.33 0.34 0.35 0.35 0.32 0.34 0.37 0.41

simulated sampling distribution of \(\widehat{p}\)

Repeat the experiment 1,000 times and display the results.

n.experiments <- 1000
df2 <- data.frame(p.hat = replicate(n.experiments, cats100()))
str(df2)
## 'data.frame':    1000 obs. of  1 variable:
##  $ p.hat: num  0.44 0.31 0.27 0.29 0.35 0.3 0.3 0.39 0.26 0.37 ...
gg2.null <- ggplot(df2, aes(p.hat)) +
  geom_histogram(color = "saddlebrown", fill = "wheat") +
  labs(title = "Null Distribution of p.hat")
gg2.null