# Rosner, chapter 3 # ================================= # See: R Tutorial : Numerical Measures # http://www.r-tutor.com/elementary-statistics/numerical-measures # See: Quick-R : Descriptive Statistics # http://www.statmethods.net/stats/descriptives.html # ================================= # example 3.19 -- breast cancer # B == diagnosed with breast cancer within 2 years # Aplus == positive mammogram # Aminus == negative mammogram n.Aneg <- 100000 p.B.given.Aneg <- 20/n.Aneg p.B.given.Apos <- 1/10 # relative risk rr <- p.B.given.Apos/p.B.given.Aneg rr # ================================= # example 3.21 -- breast cancer in general population # B == diagnosed with breast cancer within 2 years # Aplus == positive mammogram # Aminus == negative mammogram p.Apos <- .07 p.Aneg <- 1 - p.Apos p.B.given.Aneg <- 20/n.Aneg p.B.given.Apos <- 1/10 # law of total probability p.B <- p.B.given.Aneg * p.Aneg + p.B.given.Apos * p.Apos p.B # ================================= # example 3.22 -- ophthalmology cataract.ages <- c(60, 65, 70, 75) cataract.relFrequency <- c(.45, .28, .20, .07) cataract.cataractIncidence <- c(.024, .046, .088, .153) cataract <- rbind(cataract.ages, cataract.cataractIncidence) cataract plot(x=cataract.ages, y=cataract.cataractIncidence, pch=19, col="deepskyblue", xlab="age group", ylab="cataract incidence", main="Cataract incidence") # B == people who will develop cataract in next 5 years # P(B) pB <- sum(cataract.relFrequency * cataract.cataractIncidence) pB # |B| n <- 5000 ceiling(n * pB) # ================================= # example 3.23 -- predictive value PVpos <- p.B.given.Apos PVneg <- 1 - p.B.given.Aneg # ================================= # example 3.24 -- cancer # sensitivity, specificity # false positive, false negative # sensitivity == P(test or symptom | disease) # specificity == P(no test or symptom | no disease) p.smoker.given.cancer <- .90 p.smoker.given.nocancer <- .30 sensitivity <- p.smoker.given.cancer sensitivity specificity <- 1 - p.smoker.given.nocancer specificity # ================================= # example 3.25 -- cancer p.history.given.cancer <- .05 p.history.given.nocancer <- .02 sensitivity <- p.history.given.cancer sensitivity specificity <- 1 - p.history.given.nocancer specificity # ================================= # Bayes PV BayesPVpos <- function(sensitivity, specificity, prevelence){ (sensitivity * prevelence)/ (sensitivity * prevelence + (1-specificity) * (1 - prevelence)) } BayesPVneg <- function(sensitivity, specificity, prevelence){ (specificity * (1 - prevelence))/ (specificity * (1 - prevelence) + (1-sensitivity) * prevelence) } # ================================= # example 3.26 -- hypertension sensitivity <- .84 specificity <- 1 - .23 prevelence <-.20 BayesPVpos(sensitivity, specificity, prevelence) BayesPVneg(sensitivity, specificity, prevelence) # ================================= # Bayes Bayes <- function(i){ p.A.given.Bi[i] * p.Bi[i] / sum(p.A.given.Bi * p.Bi) } # ================================= # example 3.27 -- pulmonary disease, no smoking # A == chronic cough, results of lung biopsy # B == B1 normal; B2 lung cancer; B3 sarcoidosis p.A.given.Bi <- c(.001, .9, .9) p.Bi <- c(.99, .001, .009) p.B1.given.A <- Bayes(1) p.B1.given.A p.B2.given.A <- Bayes(2) p.B2.given.A p.B3.given.A <- Bayes(3) p.B3.given.A # ================================= # example 3.28 -- pulmonary disease, smoking # A == chronic cough, results of lung biopsy # B == B1 normal; B2 lung cancer; B3 sarcoidosis p.A.given.Bi <- c(.001, .9, .9) p.Bi <- c(.98, .015, .005) p.B1.given.A <- Bayes(1) p.B1.given.A p.B2.given.A <- Bayes(2) p.B2.given.A p.B3.given.A <- Bayes(3) p.B3.given.A # ================================= # example 3.33 -- ROC curve, radiology sensitivity <- c(1.0, .94, .90, .86, .65, 0) specificity <- c(0, .57, .67, .78, .97, 1.0) plot(x=(1-specificity), y=sensitivity, pch=19, col="dark red", type="b", main="ROC curve") # ================================= # example 3.34 -- area under ROC curve, radiology left <- c(.94, .90, .86, .65, 0) right <- c(1.0, .94, .90, .86, .65) base <- c(.57, .67, .78, .97, 1.0) - c(0, .57, .67, .78, .97) simpson <- sum(.5 * (left + right) * base) # ================================= # example 3.35 -- prevalence # prevalence == P(currently having the disease) == n.disease / n.population prevalence <- .203 # ================================= # example 3.36 -- cumulative incidence (over a specified period of time) # cumulative incidence == P(person currently without the disease will develop the disease within the specified time period) incidence <- 118.4/100000 # probability of developing breast cancer over the next year