# assume X = c(X_1,...,X_n) an iid sample from f(x|theta)
# here we estimate the two key quantities used in the development of the CRLB
# expected slope of the log f(X|theta) = E[d/dtheta log f(X|theta)] = 0
# variance of the slope of the log f(x|theta) = 
#     Var(d/dtheta log f(X|theta)) = E[(d/dtheta log f(X|theta))^2] = 
#                                   -E[d^2/dtheta^2 log f(X|theta)] = I(theta)

############################################################################
# Example 1.
# X ~ N(mu,sigma^2)  assume sigma^2 = 1
# f(x|mu) = (2pi)^(-1/2) exp( -1/2 (x-mu)^2 )

# L(mu) = (2*pi)^(-n/2)*exp((-1/2)*sum(x_i-mu)^2)
# l(theta) = -(n/2)*log(2*pi) - (1/2)*sum(x_i-mu)^2
# d.l(theta) = sum(x_i-mu)
# d2.l(theta) = -n

# I(mu) = n 
# AV = 1/n

# simulate some data

mu.0 = 3
n = 100

x = rnorm(n,mean = mu.0,sd = 1)
x
x.sum = sum(x)
x2.sum = sum(x^2)

# plot the fitted model on the histogram

hist(x,freq=FALSE)

mu.mle = mean(x)
mu.mle

curve(dnorm(x,mean = mu.mle, sd = 1),add=T)

############################################################################
# plot the functions of the parameter mu

mu = seq(2,4,0.01)
mu.len = length(mu)

X11()
par(mfrow=c(2,2))

# Likelihood

Lik = function(mu){
  Lik = (2*pi)^(-n/2)*exp( (-1/2)*(x2.sum - 2*mu*x.sum + n*mu^2) )
  return(Lik)
}

plot(mu,Lik(mu),type='l')

# log Likelihood

log.Lik = function(mu){
  -(n/2)*log(2*pi) - (1/2)*(x2.sum - 2*mu*x.sum + n*mu^2)
}

plot(mu,log.Lik(mu),type='l')

# derivative of the log.Lik

d.log.Lik = function(mu){
  x.sum - n*mu
}

plot(mu,d.log.Lik(mu),type='l')

# second derivative of the log.Lik

d2.log.Lik = rep(-n,mu.len)

plot(mu,d2.log.Lik,type='l')

############################################################################
# examine deriviative of the log of the density
# simulate some different data, n bigger

mu.0 = 3
n = 1000

x = sort(rnorm(n,mean = mu.0,sd = 1))

mu.mle = mean(x)

# we know the mle of mu, it is used to approximate the I(mu)

d.log.density = function(x){
  x-mu.mle
}

# plots

X11()
par(mfrow=c(2,2))

plot(x,d.log.density(x),type='l')
plot(x,d.log.density(x)^2,type='l')

# show mean is zero

ev.d.log.density.est = mean( d.log.density(x) )
ev.d.log.density.est

# estimate the Fisher Information for a single X, two ways to see 
# if there is any computational difference

I.mu.hat.est1 = mean( d.log.density(x)^2 )  
I.mu.hat.est1

AV.est1 = 1/(n*I.mu.hat.est1)
AV.est1

I.mu.hat.est2 = var( d.log.density(x) ) 
I.mu.hat.est2

AV.est2 = 1/(n*I.mu.hat.est2)
AV.est2

# Asymptotic variance of the mle AV

AV = 1/n
AV

# Approximate relative efficiency of the two computed estimates

AV/AV.est1

AV/AV.est2

# CIs

# Large sample CIs, how do they compare for different sample sizes?
# Change n to see.

mu.ci = c(mu.mle + qnorm(.025)*sqrt(AV),mu.mle + qnorm(.925)*sqrt(AV))
mu.ci.1 = c(mu.mle + qnorm(.025)*sqrt(AV.est1),mu.mle + qnorm(.925)*sqrt(AV.est1))
mu.ci.2 = c(mu.mle + qnorm(.025)*sqrt(AV.est2),mu.mle + qnorm(.925)*sqrt(AV.est2))

mu.ci; mu.ci.1; mu.ci.2