### The dataset is "America's Best Small Companies" ### ### http://www.stat.sc.edu/webstat/version2.0/data/ceo.dat ### ### bootstrap ### First create a dir C:\Stat6601\ and then save the data file ceo.txt there from the website. ### load the data CEO <- matrix(scan("C:\\Stat6601\\ceo.txt"),ncol=2,byrow=T) # scans ceo.txt, the data file, # into a matrix AGE <- CEO[,1] # assigns the first column to AGE vector SALARY <- CEO[,2] # assigns the second column to SALARY vector ### plot the AGE data hist(AGE,probability = T) # histogram plot(density(AGE),type="l") # density plot ### summarize the AGE data mean.AGE <- mean(AGE) mean.AGE sd.AGE <- sqrt(var(AGE)) sd.AGE n <- length(AGE) n se.AGE <- sd.AGE/sqrt(n) se.AGE ### confidence interval for the population mean of AGE t.out <- t.test(AGE) t.out$conf.int ### bootstrap the mean boot.AGE <- bootstrap(AGE,mean) # help(jackknife) summary(boot.AGE) limits.emp(boot.AGE) # Question: Why are we using the mean with the AGE data? # Question: How does the traditional t-confidence interval compare to the bootstrap interval? ### Repeat the analysis for the SALARY data. # Question: What is an appropriate statistics to estimate? hist(SALARY,probability = T) # histogram plot(density(SALARY),type="l") # density plot