towels

reference:
- Tintle, et al., ISI, exploration 8.2a, p.447

library(tidyverse)
library(reshape2)   # melt and cast

simulation

data

towels <- read.delim("Towels.txt", header = TRUE)
towels
##       X none samerm citizen gender guest
## 1 reuse  113    151     145    127   150
## 2   not  192    155     189    183   190

data in long format

message <- c(rep("none", towels[1, 2] + towels[2, 2]),
             rep("guest", towels[1, 6] + towels[2, 6]),
             rep("samerm", towels[1, 3] + towels[2, 3]),
             rep("citizen", towels[1, 4] + towels[2, 4]),
             rep("gender", towels[1, 5] + towels[2, 5]))
reused <- c(rep("reuse", towels[1, 2]), rep("not", towels[2, 2]),
            rep("reuse", towels[1, 6]), rep("not", towels[2, 6]),
            rep("reuse", towels[1, 3]), rep("not", towels[2, 3]),
            rep("reuse", towels[1, 4]), rep("not", towels[2, 4]),
            rep("reuse", towels[1, 5]), rep("not", towels[2, 5]))
towels.long <- data.frame(message, reused)
towels.levels <- c("none", "guest", "samerm", "citizen", "gender")
towels.long$message <- factor(towels.long$message, 
                              levels = towels.levels)
str(towels.long)
## 'data.frame':    1595 obs. of  2 variables:
##  $ message: Factor w/ 5 levels "none","guest",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ reused : Factor w/ 2 levels "not","reuse": 2 2 2 2 2 2 2 2 2 2 ...

observed statistics

Counts

tbl <- towels.long %>%
  group_by(message, reused) %>%
  summarize(n = n())
tbl
## Source: local data frame [10 x 3]
## Groups: message [?]
## 
## # A tibble: 10 x 3
##    message reused     n
##     <fctr> <fctr> <int>
##  1    none    not   192
##  2    none  reuse   113
##  3   guest    not   190
##  4   guest  reuse   150
##  5  samerm    not   155
##  6  samerm  reuse   151
##  7 citizen    not   189
##  8 citizen  reuse   145
##  9  gender    not   183
## 10  gender  reuse   127

conditional probabilities

p.hat.observed <- c(tbl[2, 3] / (tbl[2, 3] + tbl[1, 3]),
                    tbl[4, 3] / (tbl[4, 3] + tbl[3, 3]),
                    tbl[6, 3] / (tbl[6, 3] + tbl[5, 3]),
                    tbl[8, 3] / (tbl[8, 3] + tbl[7, 3]),
                    tbl[10, 3] / (tbl[10, 3] + tbl[9, 3]))
p.hat.observed <- as.numeric(p.hat.observed)
data.frame(towels.levels, p.hat.observed)
##   towels.levels p.hat.observed
## 1          none      0.3704918
## 2         guest      0.4411765
## 3        samerm      0.4934641
## 4       citizen      0.4341317
## 5        gender      0.4096774

pooled p.hat

tbl2 <- towels.long %>%
  group_by(reused) %>%
  summarize(n = n())
tbl2
## # A tibble: 2 x 2
##   reused     n
##   <fctr> <int>
## 1    not   909
## 2  reuse   686
(p.hat.pooled <- as.numeric(tbl2[2, 2] / (tbl2[2, 2] + tbl2[1, 2])))
## [1] 0.430094

Bar plot.

ggplot(tbl, aes(x = message, reused, y = n, fill = reused)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("papayawhip", "peru")) +
  labs(y = "p", title = "Towels")