### Nonparametric Bootstrap Example ### Heats of Sublimation of Platinum (kcal/mol), p.393 ######################################################################################## library(Hmisc) # The Data x <- c(136.3, 136.6, 135.8, 135.4, 134.7, 135.0, 134.1, 143.3, 147.8, 148.8, 134.8, 135.2, 134.9, 146.5, 141.2, 135.4, 134.8, 135.8, 135.0, 133.7, 134.4, 134.9, 134.8, 134.5, 134.3, 135.2) n <- length(x) # First, we will see if the data looks normal. # Plot the ecdf of the data and compare it to the normal cdf. # Standardize the values of x, get z, and then compare to the standard normal distribution. # Does the data look normal? For the data to be normal the ecdf should look like the # normal cdf. x.bar <- mean(x) x.sd <- sqrt(var(x)) z <- (x-x.bar)/x.sd plot(ecdf(z)) # Plot a histogram of the data. Do the data look normal? hist(x) # Make a stemplot. stem(x) # Make a boxplot. Are there some outliers? boxplot(x) # Make a desity plot. Does it look normal? plot(density(x), type="l") # this type = l, the letter l # Make a time plot. Note that over time there appear to be some outlies. plot(x, type="l", main="time plot") # Get some descriptive statistics on x. summary(x) ######################################################################################## # Since the data don't look normal we can't use the normal distribution and # the parametric bootstrap. We could try to find a better distibution or we could # use the nonparametric bootstrap. # Find the sampling distribution of the mean of x using the nonparametric bootstrap. B <- 1000 x.boot <- numeric(B) for(i in 1:B){ x.boot[i] <- mean(sample(x,n,replace=T)) } # A bootstrap estiamte of the population mean. mean(x.boot) # A bootstrap estimate of the standard error of the sampling distribution on the # sample mean. sd(x.boot) # A plot of the sampling distribution of the sample mean. What does is its shape? # What do you expect to see here according to the CLT. hist(x.boot, main="sampling distribtuion of the mean") # A nonparametric bootstrap 95% CI for the population mean. CI.mean <- quantile(x.boot, c(0.025, 0.975)) CI.mean ######################################################################################## # Find the sampling distribution of the median of x using the nonparametric bootstrap. B <- 1000 x.boot <- numeric(B) for(i in 1:B){ x.boot[i] <- median(sample(x,n,replace=T)) } # A bootstrap estiamte of the population median. mean(x.boot) # A bootstrap estimate of the standard error of the sampling distribution on the # sample median. sd(x.boot) # A plot of the sampling distribution of the sample median. What does is its shape? hist(x.boot, main="sampling distribution of the median") # A nonparametric bootstrap 95% CI for the population median. CI.median <- quantile(x.boot, c(0.025, 0.975)) CI.median ######################################################################################## # Find the sampling distribution of the 10% trimmed mean of x using the nonparametric bootstrap. B <- 1000 x.boot <- numeric(B) for(i in 1:B){ x.boot[i] <- mean(sample(x,n,replace=T), trim=0.10) } # A bootstrap estiamte of the population mean using the 10% trimmed mean. mean(x.boot) # A bootstrap estimate of the standard error of the sampling distribution on the # 10% trimmed mean. sd(x.boot) # A plot of the sampling distribution of the 10% trimmed mean. What does is its shape? hist(x.boot, main="sampling distribution of the 10% trimmed mean") # A nonparametric bootstrap 95% CI for the population mean. CI.trmean <- quantile(x.boot, c(0.025, 0.975)) CI.trmean ######################################################################################## CI.mean CI.median CI.trmean