sampling distribution examples

Author

Prof. Eric A. Suess

Examples of Sampling Distributions

This is R code that simulates the sampling distribution of

a. The Median
b. The Range
c. The IQR
d. The Mean
e. The SD
f. The 97.5 percentile

for a sample of size n = 30 from the N(mu, sigmasq).
And the nonparametric bootstrap is run for comparison.

Try the calculations for a simulated sample

mu <- 25
sigma <- 5

n <- 30

x <- rnorm(n, mu, sigma)

x.orig <- x # save original sample

hist(x, probability = TRUE)
points(density(x), type = "l", col = "red")

x.med <- median(x)      # median
print(paste("The median is: ", x.med))
[1] "The median is:  23.8555681712853"
x.range <- max(x) - min(x)  # range
print(paste("The range is: ", x.range))
[1] "The range is:  20.8608807943157"
x.iqr <- quantile(x, 0.75) - quantile(x, 0.25)      # IQR
print(paste("The IQR is: ", x.iqr))
[1] "The IQR is:  6.0410712393836"
x.midr <- (max(x) - min(x))/2   # midrange
print(paste("The midrange is: ", x.midr))
[1] "The midrange is:  10.4304403971579"
x.975 <- quantile(x, 0.975) # 97.5 percentile 
print(paste("The 97.5 percentile is: ", x.975))
[1] "The 97.5 percentile is:  32.8742600678588"
x.mean <- mean(x)
print(paste("The sample mean is: ", x.mean))
[1] "The sample mean is:  24.1990849343467"
x.sd <- sd(x)
print(paste("The sample standard deviation is: ", x.sd))
[1] "The sample standard deviation is:  4.62967270036225"

Run the simulations many times and plot the simulated sampling distributions of each statistic.

B <- 10000

x.med <- replicate(B, median(rnorm(n, mu, sigma)))

hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")

B <- 10000

x.med <- replicate(B, median(rnorm(n, mu, sigma)))
x.range <- replicate(B, diff(range(rnorm(n, mu, sigma))))
x.iqr <- replicate(B, diff(quantile(rnorm(n, mu, sigma), c(0.25,0.75)) ))
x.mean <- replicate(B, mean(rnorm(n, mu, sigma)))
x.sd <- replicate(B, sd(rnorm(n, mu, sigma)))
x.975 <- replicate(B, quantile(rnorm(n, mu, sigma), 0.975))

hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")

hist(x.range, probability = TRUE)
points(density(x.range), type = "l", col = "red")

hist(x.iqr, probability = TRUE)
points(density(x.iqr), type = "l", col = "red")

hist(x.mean, probability = TRUE)
points(density(x.mean), type = "l", col = "red")

hist(x.sd, probability = TRUE)
points(density(x.sd), type = "l", col = "red")

hist(x.975, probability = TRUE)
points(density(x.975), type = "l", col = "red")

So which is better the median or mean? Consider the properties of the sampling distributions.

mean(x.med)
[1] 24.99379
mean(x.mean)
[1] 24.99078
var(x.med)
[1] 1.266713
var(x.mean)
[1] 0.8484613
sd(x.med)
[1] 1.125484
sd(x.mean)
[1] 0.9211196

probability intervals

quantile(x.med,c(0.025,0.975))
    2.5%    97.5% 
22.78170 27.20929 
quantile(x.mean,c(0.025,0.975))
    2.5%    97.5% 
23.16361 26.82573 

Nonparmetric Bootstrap

Resample the original data using the simpleboot library.

library(simpleboot)
Simple Bootstrap Routines (1.1-7)
library(boot)
x <- x.orig
par(mfrow=c(2,3))

median.boot <- one.boot(x, median,  R = B)
boot.ci(median.boot)
Warning in boot.ci(median.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = median.boot)

Intervals : 
Level      Normal              Basic         
95%   (22.24, 26.03 )   (22.35, 25.93 )  

Level     Percentile            BCa          
95%   (21.78, 25.36 )   (21.78, 25.36 )  
Calculations and Intervals on Original Scale
hist(median.boot, main = "median")

my.range = function(x){
        r = max(x) - min(x)
        return(r)
}

range.boot = one.boot(x, my.range, R = B)
boot.ci(range.boot)
Warning in boot.ci(range.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = range.boot)

Intervals : 
Level      Normal              Basic         
95%   (16.64, 31.23 )   (20.86, 30.10 )  

Level     Percentile            BCa          
95%   (11.62, 20.86 )   (12.15, 20.86 )  
Calculations and Intervals on Original Scale
hist(range.boot, main="range")

iqr = function(x){
        i = quantile(x, 0.75) - quantile(x, 0.25)
        return(i)
}

iqr.boot = one.boot(x, iqr, R = B)
boot.ci(iqr.boot)
Warning in boot.ci(iqr.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = iqr.boot)

Intervals : 
Level      Normal              Basic         
95%   ( 3.395,  9.100 )   ( 3.324,  8.874 )  

Level     Percentile            BCa          
95%   ( 3.208,  8.758 )   ( 3.501,  9.032 )  
Calculations and Intervals on Original Scale
hist(iqr.boot, main = "iqr")

mean.boot = one.boot(x, mean, R = B)
boot.ci(mean.boot)
Warning in boot.ci(mean.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = mean.boot)

Intervals : 
Level      Normal              Basic         
95%   (22.56, 25.84 )   (22.50, 25.76 )  

Level     Percentile            BCa          
95%   (22.64, 25.90 )   (22.65, 25.91 )  
Calculations and Intervals on Original Scale
hist(mean.boot, main="mean")

sd.boot = one.boot(x, sd, R = B)
boot.ci(sd.boot)
Warning in boot.ci(sd.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = sd.boot)

Intervals : 
Level      Normal              Basic         
95%   ( 3.320,  6.192 )   ( 3.327,  6.063 )  

Level     Percentile            BCa          
95%   ( 3.196,  5.932 )   ( 3.408,  6.276 )  
Calculations and Intervals on Original Scale
hist(sd.boot, main="sd")

p975 = function(x){
    x.975 = quantile(x, 0.975)
    return(x.975)
}

p975.boot = one.boot(x, p975, R = B)
boot.ci(p975.boot)
Warning in boot.ci(p975.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = p975.boot)

Intervals : 
Level      Normal              Basic         
95%   (26.23, 38.77 )   (27.44, 36.69 )  

Level     Percentile            BCa          
95%   (29.06, 38.31 )   (28.93, 38.31 )  
Calculations and Intervals on Original Scale
hist(p975.boot, main="97.5 percentile")