smoking

references:
- ISI, example 5.3, p.284

library(tidyverse)
library(knitr)

simulation

data

smoking <- read.csv("Smoking.csv")
str(smoking)
## 'data.frame':    4167 obs. of  2 variables:
##  $ Parents: Factor w/ 2 levels "nonsmokers","smokers": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Child  : Factor w/ 2 levels "boy","girl": 2 2 2 2 2 2 2 2 2 2 ...
tbl <- smoking %>%
  group_by(Parents, Child) %>%
  summarize(n = n())
tbl
## # A tibble: 4 x 3
## # Groups:   Parents [?]
##      Parents  Child     n
##       <fctr> <fctr> <int>
## 1 nonsmokers    boy  1975
## 2 nonsmokers   girl  1627
## 3    smokers    boy   255
## 4    smokers   girl   310

Bar plot.

ggplot(tbl, aes(x = Parents, Child, y = n, fill = Child)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = c("mintcream", "turquoise")) +
  labs(title = "Smoking")