---
title: "sampling distribution examples"
author: "Prof. Eric A. Suess"
format: 
  html:
    self-contained: true
---

## Examples of Sampling Distributions

This is R code that simulates the sampling distribution of 
   
    a. The Median
    b. The Range
    c. The IQR
    d. The Mean
    e. The SD
    f. The 97.5 percentile

for a sample of size n = 30 from the N(mu, sigmasq).  
And the nonparametric bootstrap is run for comparison.

Try the calculations for a simulated sample

```{r}
mu <- 25
sigma <- 5

n <- 30

x <- rnorm(n, mu, sigma)

x.orig <- x	# save original sample

hist(x, probability = TRUE)
points(density(x), type = "l", col = "red")
```


```{r}
x.med <- median(x)		# median
print(paste("The median is: ", x.med))

x.range <- max(x) - min(x)	# range
print(paste("The range is: ", x.range))

x.iqr <- quantile(x, 0.75) - quantile(x, 0.25)		# IQR
print(paste("The IQR is: ", x.iqr))

x.midr <- (max(x) - min(x))/2	# midrange
print(paste("The midrange is: ", x.midr))

x.975 <- quantile(x, 0.975)	# 97.5 percentile 
print(paste("The 97.5 percentile is: ", x.975))

x.mean <- mean(x)
print(paste("The sample mean is: ", x.mean))

x.sd <- sd(x)
print(paste("The sample standard deviation is: ", x.sd))
```

Run the simulations many times and plot the simulated sampling distributions of each statistic.

```{r}
B <- 10000

x.med <- replicate(B, median(rnorm(n, mu, sigma)))

hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")
```

```{r}
B <- 10000

x.med <- replicate(B, median(rnorm(n, mu, sigma)))
x.range <- replicate(B, diff(range(rnorm(n, mu, sigma))))
x.iqr <- replicate(B, diff(quantile(rnorm(n, mu, sigma), c(0.25,0.75)) ))
x.mean <- replicate(B, mean(rnorm(n, mu, sigma)))
x.sd <- replicate(B, sd(rnorm(n, mu, sigma)))
x.975 <- replicate(B, quantile(rnorm(n, mu, sigma), 0.975))

hist(x.med, probability = TRUE)
points(density(x.med), type = "l", col = "red")
hist(x.range, probability = TRUE)
points(density(x.range), type = "l", col = "red")
hist(x.iqr, probability = TRUE)
points(density(x.iqr), type = "l", col = "red")
hist(x.mean, probability = TRUE)
points(density(x.mean), type = "l", col = "red")
hist(x.sd, probability = TRUE)
points(density(x.sd), type = "l", col = "red")
hist(x.975, probability = TRUE)
points(density(x.975), type = "l", col = "red")
```

So which is better the **median** or **mean**?
Consider the properties of the sampling distributions.

```{r}
mean(x.med)
mean(x.mean)
```

```{r}
var(x.med)
var(x.mean)
```

```{r}
sd(x.med)
sd(x.mean)
```


# probability intervals

```{r}
quantile(x.med,c(0.025,0.975))
quantile(x.mean,c(0.025,0.975))
```

## Nonparmetric Bootstrap

Resample the original data using the simpleboot library.

```{r}
library(simpleboot)
library(boot)
```

```{r}
x <- x.orig
```

```{r}
par(mfrow=c(2,3))

median.boot <- one.boot(x, median,  R = B)
boot.ci(median.boot)
hist(median.boot, main = "median")

my.range = function(x){
        r = max(x) - min(x)
        return(r)
}

range.boot = one.boot(x, my.range, R = B)
boot.ci(range.boot)
hist(range.boot, main="range")

iqr = function(x){
        i = quantile(x, 0.75) - quantile(x, 0.25)
        return(i)
}

iqr.boot = one.boot(x, iqr, R = B)
boot.ci(iqr.boot)
hist(iqr.boot, main = "iqr")

mean.boot = one.boot(x, mean, R = B)
boot.ci(mean.boot)
hist(mean.boot, main="mean")

sd.boot = one.boot(x, sd, R = B)
boot.ci(sd.boot)
hist(sd.boot, main="sd")

p975 = function(x){
	x.975 = quantile(x, 0.975)
	return(x.975)
}

p975.boot = one.boot(x, p975, R = B)
boot.ci(p975.boot)
hist(p975.boot, main="97.5 percentile")

```