### The dataset is "America's Best Small Companies"

###

### http://www.stat.sc.edu/webstat/version2.0/data/ceo.dat

###

### bootstrap

### First create a dir C:\Stat6601\ and then save the data file ceo.txt there from the website.

### load the data

CEO <- matrix(scan("C:\\Stat6601\\ceo.txt"),ncol=2,byrow=T) # scans ceo.txt, the data file,

# into a matrix

AGE <- CEO[,1] # assigns the first column to AGE vector

SALARY <- CEO[,2] # assigns the second column to SALARY vector

### plot the AGE data

hist(AGE,probability = T) # histogram

plot(density(AGE),type="l") # density plot

### summarize the AGE data

mean.AGE <- mean(AGE)

mean.AGE

sd.AGE <- sqrt(var(AGE))

sd.AGE

n <- length(AGE)

n

se.AGE <- sd.AGE/sqrt(n)

se.AGE

### confidence interval for the population mean of AGE

t.out <- t.test(AGE)

t.out$conf.int

### bootstrap the mean

boot.AGE <- bootstrap(AGE,mean) # help(jackknife)

summary(boot.AGE)

limits.emp(boot.AGE)

# Question: Why are we using the mean with the AGE data?

# Question: How does the traditional t-confidence interval compare to the bootstrap interval?

### Repeat the analysis for the SALARY data.

# Question: What is an appropriate statistics to estimate?

hist(SALARY,probability = T) # histogram

plot(density(SALARY),type="l") # density plot