# Simulation of Problem 22, Ross Probability Models

p <- 1		# number of equally likely values (0,1,...,p)
m <- p+1	# number of possible outcomes
k <- 5		# number of values in a run

B <- 1000

N <- numeric(B)	# vector of simulated values
X <- matrix(NA,nrow=B,ncol=k)
		# matrix of runs

for(i in 1:B){
x <- sample(0:p,k,replace=TRUE)
#y <- x	# used to check program, N = length(y) 
N[i] <- k 	# count length of sequence
repeat{
	u <- unique(x)		# check if all the same
	if(length(u)==1){
		X[i,] <- x	# store final run
		break		# if length is 1, all the same, stop
	}
	new <- sample(0:p,1,replace=TRUE)	# new last value
	x <- c(x[2:k],new)	# move window
	#y <- c(y,new)
	N[i] <- N[i] + 1	# incrment count
}
}

N 	# length of sequence in ending with a run of k
x	# the run
#y	# sequence
#length(y)

# Plot the distribution of N.

hist(N)
N.mean <- mean(N)
N.mean
N.var <- var(N)
N.var
N.sd <- sqrt(N.var)
N.sd

# For p = 1, m = 2, k = 5, test if N of 50 is consistent with 
# E[N] = (m^k - 1)/(m - 1)

value <- 50

N.E <- (m^k - 1)/(m - 1)
N.SD <- N.E	# according to the book

z = (value - N.E)/N.SD
z

p_value <- 1-pnorm(abs(z))
p_value

z.obs <- (value - N.mean)/N.sd
z.obs

p_value.obs <- 1-pnorm(abs(z.obs))
p_value.obs

# What proportion of the simulated values are greater than 50?

p_value.boot <- length(N[N>value])/length(N)
p_value.boot