trip

references:
- ISI, exploration 4.2, p.241

library(tidyverse)
library(knitr)

are males evenly distributed between the groups?

data

trip <- read.delim("randomizing.txt")
trip
##       name    sex height gene. x.var
## 1    Roger   male     68   yes    75
## 2      Sam   male     67   yes    65
## 3     Mary female     66   yes    62
## 4   Barbie female     63   yes    74
## 5     Matt   male     73    no    76
## 6     Paul   male     70    no    69
## 7    Pedro   male     69   yes    71
## 8   Martha female     66   yes    62
## 9    Betty female     63   yes    74
## 10    Russ   male     68   yes    75
## 11    Brad   male     70    no    67
## 12     Bob   male     70    no    67
## 13    Kyle   male     71    no    89
## 14  Alisha female     61    no    77
## 15  Audrey female     67   yes    88
## 16   Alice female     67   yes    88
## 17 Patrick   male     70    no    69
## 18   Mitch   male     73    no    76
## 19   Peter   male     69   yes    71
## 20  Marvin   male     71    no    95
## 21 Michael   male     71    no    95
## 22   Shawn   male     67   yes    65
## 23    Anna female     61    no    77
## 24   Kevin   male     71    no    89

Initial assignment to groups

groups <- rep(0:1, each = 12)
trip$group <- groups
str(trip)
## 'data.frame':    24 obs. of  6 variables:
##  $ name  : Factor w/ 24 levels "Alice","Alisha",..: 21 23 13 5 14 18 19 11 6 22 ...
##  $ sex   : Factor w/ 2 levels "female","male": 2 2 1 1 2 2 2 1 1 2 ...
##  $ height: int  68 67 66 63 73 70 69 66 63 68 ...
##  $ gene. : Factor w/ 2 levels "no","yes": 2 2 2 2 1 1 2 2 2 2 ...
##  $ x.var : int  75 65 62 74 76 69 71 62 74 75 ...
##  $ group : int  0 0 0 0 0 0 0 0 0 0 ...

Report difference in proportion of males in the two groups

tbl <- trip %>%
  group_by(sex, group) %>%
  summarize(n = n())
kable(tbl)
sex group n
female 0 4
female 1 4
male 0 8
male 1 8

random assignment

Randomly assign participants to groups … report difference in proportion of males in the two groups

males24 <- function(){
  df.samp <- trip
  new.groups <- sample(df.samp$group, size = 24, replace = FALSE)  # permute group assignments
  df.samp$group <- new.groups
  tbl <- df.samp %>%
    group_by(sex, group) %>%
    summarize(n = n())
  n.male.0 <- tbl[tbl$sex == "male" & tbl$group == "0", ]$n
  n.male.1 <- tbl[tbl$sex == "male" & tbl$group == "1", ]$n
  p.diff <- (n.male.0 - n.male.1) / 12
  return(p.diff)
}

Repeat the experiment 10 times.

replicate(10, males24())
##  [1]  0.1666667 -0.1666667 -0.1666667 -0.1666667  0.1666667  0.0000000
##  [7]  0.0000000  0.1666667  0.0000000  0.1666667

simulated sampling distribution of \(p.diff\)

Repeat the experiment 1,000 times and display the results.

n.experiments <- 1e3
df2 <- data.frame(p.diff = replicate(n.experiments, males24()))
str(df2)
## 'data.frame':    1000 obs. of  1 variable:
##  $ p.diff: num  -0.333 -0.167 -0.333 -0.333 -0.167 ...

The most extreme case will have 12 males in one group and 4 in the other, giving \(p.diff = 2/3\).

ggplot(df2, aes(p.diff)) +
  geom_histogram(color = "saddlebrown", fill = "wheat") +
  labs(title = "Null Distribution of p.diff")