# 1. cats

reference:
- ISI, exploration 3.5b, p.203

library(tidyverse)

## hypotheses

$H_0 : \pi = 1/3$ $H_a : \pi < 1/3$

## observed $$\widehat{p}$$

n <- 47000
p.hat.observed <- 0.324
pi.null <- 1 / 3

## theory-based HT

pi <- 1 / 3
se <- sqrt(p.hat.observed * (1 - p.hat.observed) / n)
z <- (p.hat.observed - pi.null) / se
z
## [1] -4.323544

## $$p.value$$

p.value <- pnorm(z)
p.value
## [1] 7.677135e-06

## significance

alpha <- 0.001
reject.H0 <- p.value <= alpha
reject.H0
## [1] TRUE

## 99.9% CI for $$\pi$$

point.estimate <- p.hat.observed
multiplier <- qnorm(1 - alpha/2)
ci <- point.estimate + multiplier * se * c(-1, 1)
ci
## [1] 0.3168967 0.3311033

# 2. sampling distribution of $$\widehat{p}$$ if $$\pi_{null} = 1/3$$ and $$n = 100$$

pi.null <- 1 / 3
n <- 100
alpha <- 0.05

## simulation

Design an experiment: “success” means the household has a cat … choose “success” with probabiity 1/3 … repeat 100 times … report the proportion of successes

cats100 <- function(){
samp <- sample(0:1, size = n, prob = c(1 - pi.null, pi.null), replace = TRUE)
p.hat <- mean(samp)
return(p.hat)
}

Repeat the experiment 10 times.

replicate(10, cats100())
##  [1] 0.32 0.28 0.33 0.34 0.35 0.35 0.32 0.34 0.37 0.41

## simulated sampling distribution of $$\widehat{p}$$

Repeat the experiment 1,000 times and display the results.

n.experiments <- 1000
df2 <- data.frame(p.hat = replicate(n.experiments, cats100()))
str(df2)
## 'data.frame':    1000 obs. of  1 variable:
##  \$ p.hat: num  0.44 0.31 0.27 0.29 0.35 0.3 0.3 0.39 0.26 0.37 ...
gg2.null <- ggplot(df2, aes(p.hat)) +
geom_histogram(color = "saddlebrown", fill = "wheat") +
labs(title = "Null Distribution of p.hat")
gg2.null