<- 25
mu <- 5
sigma
<- 30
n
<- rnorm(n, mu, sigma)
x
<- x # save original sample
x.orig
hist(x, probability = TRUE)
points(density(x), type = "l", col = "red")
sampling distribution examples
Examples of Sampling Distributions
This is R code that simulates the sampling distribution of
a. The Median
b. The Range
c. The IQR
d. The Mean
e. The SD
f. The 97.5 percentile
for a sample of size n = 30 from the N(mu, sigmasq).
And the nonparametric bootstrap is run for comparison.
Try the calculations for a simulated sample
<- median(x) # median
x.med print(paste("The median is: ", x.med))
[1] "The median is: 23.8555681712853"
<- max(x) - min(x) # range
x.range print(paste("The range is: ", x.range))
[1] "The range is: 20.8608807943157"
<- quantile(x, 0.75) - quantile(x, 0.25) # IQR
x.iqr print(paste("The IQR is: ", x.iqr))
[1] "The IQR is: 6.0410712393836"
<- (max(x) - min(x))/2 # midrange
x.midr print(paste("The midrange is: ", x.midr))
[1] "The midrange is: 10.4304403971579"
.975 <- quantile(x, 0.975) # 97.5 percentile
xprint(paste("The 97.5 percentile is: ", x.975))
[1] "The 97.5 percentile is: 32.8742600678588"
<- mean(x)
x.mean print(paste("The sample mean is: ", x.mean))
[1] "The sample mean is: 24.1990849343467"
<- sd(x)
x.sd print(paste("The sample standard deviation is: ", x.sd))
[1] "The sample standard deviation is: 4.62967270036225"
Run the simulations many times and plot the simulated sampling distributions of each statistic.
<- 10000
B
<- replicate(B, median(rnorm(n, mu, sigma)))
x.med
hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")
<- 10000
B
<- replicate(B, median(rnorm(n, mu, sigma)))
x.med <- replicate(B, diff(range(rnorm(n, mu, sigma))))
x.range <- replicate(B, diff(quantile(rnorm(n, mu, sigma), c(0.25,0.75)) ))
x.iqr <- replicate(B, mean(rnorm(n, mu, sigma)))
x.mean <- replicate(B, sd(rnorm(n, mu, sigma)))
x.sd .975 <- replicate(B, quantile(rnorm(n, mu, sigma), 0.975))
x
hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")
hist(x.range, probability = TRUE)
points(density(x.range), type = "l", col = "red")
hist(x.iqr, probability = TRUE)
points(density(x.iqr), type = "l", col = "red")
hist(x.mean, probability = TRUE)
points(density(x.mean), type = "l", col = "red")
hist(x.sd, probability = TRUE)
points(density(x.sd), type = "l", col = "red")
hist(x.975, probability = TRUE)
points(density(x.975), type = "l", col = "red")
So which is better the median or mean? Consider the properties of the sampling distributions.
mean(x.med)
[1] 24.99379
mean(x.mean)
[1] 24.99078
var(x.med)
[1] 1.266713
var(x.mean)
[1] 0.8484613
sd(x.med)
[1] 1.125484
sd(x.mean)
[1] 0.9211196
probability intervals
quantile(x.med,c(0.025,0.975))
2.5% 97.5%
22.78170 27.20929
quantile(x.mean,c(0.025,0.975))
2.5% 97.5%
23.16361 26.82573
Nonparmetric Bootstrap
Resample the original data using the simpleboot library.
library(simpleboot)
Simple Bootstrap Routines (1.1-7)
library(boot)
<- x.orig x
par(mfrow=c(2,3))
<- one.boot(x, median, R = B)
median.boot boot.ci(median.boot)
Warning in boot.ci(median.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = median.boot)
Intervals :
Level Normal Basic
95% (22.24, 26.03 ) (22.35, 25.93 )
Level Percentile BCa
95% (21.78, 25.36 ) (21.78, 25.36 )
Calculations and Intervals on Original Scale
hist(median.boot, main = "median")
= function(x){
my.range = max(x) - min(x)
r return(r)
}
= one.boot(x, my.range, R = B)
range.boot boot.ci(range.boot)
Warning in boot.ci(range.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = range.boot)
Intervals :
Level Normal Basic
95% (16.64, 31.23 ) (20.86, 30.10 )
Level Percentile BCa
95% (11.62, 20.86 ) (12.15, 20.86 )
Calculations and Intervals on Original Scale
hist(range.boot, main="range")
= function(x){
iqr = quantile(x, 0.75) - quantile(x, 0.25)
i return(i)
}
= one.boot(x, iqr, R = B)
iqr.boot boot.ci(iqr.boot)
Warning in boot.ci(iqr.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = iqr.boot)
Intervals :
Level Normal Basic
95% ( 3.395, 9.100 ) ( 3.324, 8.874 )
Level Percentile BCa
95% ( 3.208, 8.758 ) ( 3.501, 9.032 )
Calculations and Intervals on Original Scale
hist(iqr.boot, main = "iqr")
= one.boot(x, mean, R = B)
mean.boot boot.ci(mean.boot)
Warning in boot.ci(mean.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = mean.boot)
Intervals :
Level Normal Basic
95% (22.56, 25.84 ) (22.50, 25.76 )
Level Percentile BCa
95% (22.64, 25.90 ) (22.65, 25.91 )
Calculations and Intervals on Original Scale
hist(mean.boot, main="mean")
= one.boot(x, sd, R = B)
sd.boot boot.ci(sd.boot)
Warning in boot.ci(sd.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = sd.boot)
Intervals :
Level Normal Basic
95% ( 3.320, 6.192 ) ( 3.327, 6.063 )
Level Percentile BCa
95% ( 3.196, 5.932 ) ( 3.408, 6.276 )
Calculations and Intervals on Original Scale
hist(sd.boot, main="sd")
= function(x){
p975 .975 = quantile(x, 0.975)
xreturn(x.975)
}
= one.boot(x, p975, R = B)
p975.boot boot.ci(p975.boot)
Warning in boot.ci(p975.boot): bootstrap variances needed for studentized
intervals
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = p975.boot)
Intervals :
Level Normal Basic
95% (26.23, 38.77 ) (27.44, 36.69 )
Level Percentile BCa
95% (29.06, 38.31 ) (28.93, 38.31 )
Calculations and Intervals on Original Scale
hist(p975.boot, main="97.5 percentile")