### The dataset is "America's Best Small Companies"
###
### http://www.stat.sc.edu/webstat/version2.0/data/ceo.dat
###
### bootstrap
### First create a dir C:\Stat6601\ and then save the data file ceo.txt
there from the website.
### load the data
CEO <- matrix(scan("C:\\Stat6601\\ceo.txt"),ncol=2,byrow=T)
# scans ceo.txt, the data file,
# into a matrix
AGE <- CEO[,1] # assigns the first
column to AGE vector
SALARY <- CEO[,2] # assigns the second
column to SALARY vector
### plot the AGE data
hist(AGE,probability = T) # histogram
plot(density(AGE),type="l") #
density plot
### summarize the AGE data
mean.AGE <- mean(AGE)
mean.AGE
sd.AGE <- sqrt(var(AGE))
sd.AGE
n <- length(AGE)
n
se.AGE <- sd.AGE/sqrt(n)
se.AGE
### confidence interval for the
population mean of AGE
t.out <- t.test(AGE)
t.out$conf.int
### bootstrap the mean
boot.AGE <- bootstrap(AGE,mean) #
help(jackknife)
summary(boot.AGE)
limits.emp(boot.AGE)
# Question: Why are we using the mean
with the AGE data?
# Question: How does the traditional
t-confidence interval compare to the bootstrap interval?
### Repeat the analysis for the SALARY
data.
# Question: What is an appropriate
statistics to estimate?
hist(SALARY,probability = T) # histogram
plot(density(SALARY),type="l")
# density plot