### Here we fit the MLE for the normal # X_1, X_2, ...,X_n iid Normal(mu, sigma2), find the MLEs. # p.54 f(x|mu,sigma)= (1/(sigma*sqrt(2*pi)))*exp(-(x-mu)^2/2*sigma^2) # Consider a random sample from the normal distribution n = 30 mu = 70 sigma = 3 sigma2 = 3^2 x = rnorm(n, mu, sigma) x.orig = x x.mean = mean(x) x.sd = sd(x) x.sigma = sqrt(((n-1)/n)*var(x)) # make a histogram X11() hist(x,probability=T) x.min = min(hist(x)$breaks) x.max = max(hist(x)$breaks) y.max = max(hist(x)$density) + 0.05 # use the density estimator since this is a continuous random variable X11() plot(density(x, n=50, window="g"),type="l",xlab="x",ylab="density") # MLE fitted normal p.255 mu.hat.mle = mean(x) sigma.hat.mle = sqrt((1/length(x))*sum((x-mean(x))^2)) # plot the fitted model to the simulated data xx = seq(x.min,x.max,0.1) yy = dnorm(xx, mu.hat.mle, sigma.hat.mle) X11() plot(xx,yy, main="MLE fitted normal",type="l") # make plot of histogram with the fitted model X11() hist(x,probability=T,xlim=c(x.min,x.max),ylim=c(0,y.max),main="MLE fitted normal", xlab="x",ylab="density") par(new=T) plot(xx,yy, type="l",xlim=c(x.min,x.max),ylim=c(0,y.max),xlab="", ylab="") # make a plot of the density estimator with the fitted model. X11() plot(density(x, n=50, window="g"),type="l",xlim=c(x.min,x.max),ylim=c(0,y.max), main="MLE fitted normal",xlab="x",ylab="density") par(new=T) plot(xx,yy, type="l",xlim=c(x.min,x.max),ylim=c(0,y.max), xlab="x",ylab="density") # exact confidence interval for mu mu.ci.exact = c(x.mean - 1.96*x.sd/sqrt(n), x.mean + 1.96*x.sd/sqrt(n)) mu.ci.exact # large sample confidence interval for mu mu.ci.large = c(mu.hat.mle - 1.96*sigma.hat.mle/sqrt(n), mu.hat.mle + 1.96*sigma.hat.mle/sqrt(n)) mu.ci.large # parametric bootstrap the MLE B = 1000 mu.hat.mle.star = numeric(B) sigma.hat.mle.star = numeric(B) for(i in 1:B){ x = rnorm(n,mu.hat.mle,sigma.hat.mle) mu.hat.mle.star[i] = mean(x) sigma.hat.mle.star[i] = sqrt(((n-1)/n)*var(x)) } X11() plot(density(mu.hat.mle.star, n=50, window="g"),type="l",xlab="x",ylab="density") X11() plot(density(sigma.hat.mle.star, n=50, window="g"),type="l",xlab="x",ylab="density") # bootstap mu mu.mle.boot.m = mean(mu.hat.mle.star) mu.mle.boot.s = sd(mu.hat.mle.star) alpha = 0.05 alpha.b = c((alpha/2),(1-alpha/2)) # percentile method mu.mle.boot.ci = quantile(mu.hat.mle.star,alpha.b) mu.mle.boot.ci # bootstrap confidence interval - presented in the Rice book mu.hat.mle.delta = mu.hat.mle.star - mu.hat.mle delta.up = quantile(mu.hat.mle.delta,(1-alpha/2)) delta.low = quantile(mu.hat.mle.delta,(alpha/2)) boot.LB = mu.hat.mle - delta.up boot.LB boot.UB = mu.hat.mle - delta.low boot.UB # bootstap sigma sigma.mle.boot.m = mean(sigma.hat.mle.star) sigma.mle.boot.s = sd(sigma.hat.mle.star) # percentile method sigma.mle.boot.ci = quantile(sigma.hat.mle.star,c(0.025,0.975)) sigma.mle.boot.ci # bootstrap confidence interval - presented in the Rice book sigma.hat.mle.delta = sigma.hat.mle.star - sigma.hat.mle delta.up = quantile(sigma.hat.mle.delta,(1-alpha/2)) delta.low = quantile(sigma.hat.mle.delta,(alpha/2)) boot.LB = sigma.hat.mle - delta.up boot.LB boot.UB = sigma.hat.mle - delta.low boot.UB ######################################################################### ######################################################################### # nonparametric bootstrap # use the boot() function in R library(boot) ### mean mean.fn = function(x, d){ return(mean(x[d])) } system.time(mu.boot <- boot(x.orig, mean.fn, R=100000)) plot(mu.boot) mu.boot length(mu.boot$t) mu.boot$t0 summary(mu.boot$data) conf1 = c(0.90,0.95) ci = boot.ci(mu.boot, conf=conf1, type="all"); ci hist(mu.boot$t) # install the snow library, one library used for parallel processing in R library(snow) system.time(mu.boot <- boot(x.orig, mean.fn, R = 50000, parallel = c("snow"), ncpus = 2)) plot(mu.boot) mu.boot length(mu.boot$t) mu.boot$t0 summary(mu.boot$data) conf1 = c(0.90,0.95) ci = boot.ci(mu.boot, conf=conf1, type="all"); ci hist(mu.boot$t) ### time comparison with larger B, should be able to see the parallel processing in the ### Task Manager system.time(mu.boot <- boot(x.orig, mean.fn, R=1000000)) system.time(mu.boot <- boot(x.orig, mean.fn, R = 500000, parallel = c("snow"), ncpus = 2))