### This is an R program that simulates the sampling distribution of ### a. The Median ### b. The Range ### c. The IQR ### d. The Mean ### e. The SD ### f. The 97.5 percentile ### for a sample of size n = 30 from the N(mu, sigmasq). ### And the nonparametric bootstrap is run for comparison. # try the calculations for a simulated sample mu = 25 sigma = 5 n = 30 x = rnorm(n, mu, sigma) x.orig = x # save original sample hist(x) x.med = median(x) # median x.med x.range = max(x) - min(x) # range x.range x.iqr = quantile(x, 0.75) - quantile(x, 0.25) # IQR x.iqr x.midr = (max(x) - min(x))/2 # midrange x.midr x.975 = quantile(x, 0.975) # 97.5 percentile x.975 x.mean = mean(x) x.mean x.sd = sd(x) x.sd # run the simulations many times and plot the simulated sampling # distributions of each statistic reps = 10000 x.med = numeric(reps) x.range = numeric(reps) x.iqr = numeric(reps) x.mean = numeric(reps) x.sd = numeric(reps) x.975 = numeric(reps) for(i in 1:reps){ x = rnorm(n, mu, sigma) x.med[i] = median(x) x.range[i] = max(x) - min(x) x.iqr[i] = quantile(x, 0.75) - quantile(x, 0.25) x.mean[i] = mean(x) x.sd[i] = sqrt(var(x)) x.975[i] = quantile(x, 0.975) } # The sampling distributions of the statistics br = (20:31)-0.5 par(mfrow=c(2,3)) hist(x.med, nclass=11, breaks=br, ylim=c(0,4500), main="median") hist(x.range, ylim=c(0,4500), main="range") hist(x.iqr, ylim=c(0,4500), main="iqr") hist(x.mean, nclass=11, breaks=br, ylim=c(0,4500), main="mean") hist(x.sd, ylim=c(0,4500), main="sd") hist(x.975, ylim=c(0,4500), main="97.5 percentile") # So which is better the median or mean? # Consider the properties of the sampling distributions. mean(x.med) mean(x.mean) sd(x.med) sd(x.mean) # probability intervals quantile(x.med,c(0.025,0.975)) quantile(x.mean,c(0.025,0.975)) # Note: the mean is a better estimator as it se is smaller and it # has a more concentrated sampling distribution. # Note: the above could be turned into a parametric bootstrap by # replacing the mu and sigma in the simulation with say the MMEs # or the MLEs. ###################################################################### # Run the nonparmetric bootstrap, resample the original data # using the simpleboot library. library(simpleboot) x = x.orig X11() par(mfrow=c(2,3)) median.boot = one.boot(x, median, R = reps) #print(median.boot) boot.ci(median.boot) hist(median.boot,main="median") my.range = function(x){ r = max(x) - min(x) return(r) } range.boot = one.boot(x, my.range, R=reps) boot.ci(range.boot) hist(range.boot,main="range") iqr = function(x){ i = quantile(x, 0.75) - quantile(x, 0.25) return(i) } iqr.boot = one.boot(x, iqr, R=reps) boot.ci(iqr.boot) hist(iqr.boot,main="iqr") mean.boot = one.boot(x, mean, R = reps) #print(mean.boot) boot.ci(mean.boot) hist(mean.boot,main="mean") sd.boot = one.boot(x, sd, R = reps) boot.ci(sd.boot) hist(sd.boot,main="sd") p975 = function(x){ x.975 = quantile(x, 0.975) return(x.975) } p975.boot = one.boot(x, p975, R = reps) boot.ci(p975.boot) hist(p975.boot,main="97.5 percentile")