lights

reference:
- Tintle, et al., ISI, exploration 8.2b, p.447

library(tidyverse)
library(reshape2)   # melt and cast

simulation

data

(lights1 <- read.delim("NightLight1.txt", header = TRUE))
##      Darkness NightLight RoomLight
## Near       18         78        41
## Not       154        154        34
(lights2 <- read.delim("NightLight2.txt", header = TRUE))
##        Darkness NightLight RoomLight
## Near         18         78        41
## Normal      114        115        22
## Far          40         39        12

data in long format

light <- c(rep("Darkness", 18 + 114 + 40),
           rep("NightLight", 78 + 115 + 39),
           rep("RoomLight", 41 + 22 + 12))
eyesight <- c(rep("Near", 18), rep("Normal", 114), rep("Far", 40),
              rep("Near", 78), rep("Normal", 115), rep("Far", 39),
              rep("Near", 41), rep("Normal", 22), rep("Far", 12))
lights.long <- data.frame(light, eyesight)
lights.long$eyesight <- factor(lights.long$eyesight, 
                               levels = c("Near", "Normal", "Far"))
str(lights.long)
## 'data.frame':    479 obs. of  2 variables:
##  $ light   : Factor w/ 3 levels "Darkness","NightLight",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ eyesight: Factor w/ 3 levels "Near","Normal",..: 1 1 1 1 1 1 1 1 1 1 ...

counts

tbl <- lights.long %>%
  group_by(light, eyesight) %>%
  summarize(n = n())
tbl
## Source: local data frame [9 x 3]
## Groups: light [?]
## 
## # A tibble: 9 x 3
##        light eyesight     n
##       <fctr>   <fctr> <int>
## 1   Darkness     Near    18
## 2   Darkness   Normal   114
## 3   Darkness      Far    40
## 4 NightLight     Near    78
## 5 NightLight   Normal   115
## 6 NightLight      Far    39
## 7  RoomLight     Near    41
## 8  RoomLight   Normal    22
## 9  RoomLight      Far    12

chi-squared test statistic

reshape the data

swing eyesight up into the col position

(tbl.wide <- dcast(tbl, light ~ eyesight, value.var = "n"))
##        light Near Normal Far
## 1   Darkness   18    114  40
## 2 NightLight   78    115  39
## 3  RoomLight   41     22  12
str(tbl.wide)
## 'data.frame':    3 obs. of  4 variables:
##  $ light : Factor w/ 3 levels "Darkness","NightLight",..: 1 2 3
##  $ Near  : int  18 78 41
##  $ Normal: int  114 115 22
##  $ Far   : int  40 39 12

df as array

tbl.array <- rbind(tbl.wide$Near, tbl.wide$Normal, tbl.wide$Far)

chisq observed

chisq.test(tbl.array)
## 
##  Pearson's Chi-squared test
## 
## data:  tbl.array
## X-squared = 56.513, df = 4, p-value = 1.565e-11

Display the test statistic

chisq.test(tbl.array)$statistic
## X-squared 
##  56.51346

observed value of \(X^2\)

(X.sq.observed <- as.numeric(chisq.test(tbl.array)$statistic))
## [1] 56.51346

shuffle

Write a function.

shuffle <- function(){
  df.shuffle <- lights.long
  samp <- sample(df.shuffle$eyesight, size = length(df.shuffle$eyesight), replace = FALSE)
  df.shuffle$eyesight <- samp
  tbl.shuffle <- df.shuffle %>%
    group_by(light, eyesight) %>%
    summarize(n = n())
  tbl.wide <- dcast(tbl.shuffle, light ~ eyesight, value.var = "n")
  tbl.array <- rbind(tbl.wide$Near, tbl.wide$Normal, tbl.wide$Far)
  test.statistic <- as.numeric(chisq.test(tbl.array)$statistic)
  return(test.statistic)
}

shuffle 10 times

replicate(10, shuffle())
##  [1]  2.4671169  4.9920404  2.4635964  3.7323640 10.2544067  0.7126796
##  [7]  0.8442767  5.3969426  3.7527721  3.7080596

sampling distribution of the \(X^2\) statistic

n.experiments <- 1000
df1 <- data.frame(X.sq = replicate(n.experiments, shuffle()))
str(df1)
## 'data.frame':    1000 obs. of  1 variable:
##  $ X.sq: num  1.93 8.85 4.04 1.69 1.79 ...
draw.sampling.distribution(df1)