### Regression, mean or median estimation? # regress systolic blood presure (SBP) as a function of age Age = c(39,47,45,47,65,46,67,42,67,56,64,56,59,34,42) SBP = c(144,220,138,145,162,142,170,124,158,154,162,150,140,110,128) X11() plot(Age, SBP) # using standard ols, minimuze sum of square errors (SSE) Pred = lm(SBP ~ Age) Pred X11() plot(Age, SBP) lines(Age, fitted(Pred)) plot(Age, residuals(Pred)) # using quantile regression, minimized least absolute deviation (LAD) library(quantreg) Pred = rq(formula = SBP ~ Age) Pred b = coef(Pred) pred = b[1] + b[2]*Age X11() plot(Age, SBP) lines(Age,pred) # Is quantile regression is more robust to outliers? # Consider estimating the tau precentile in the scatterplot. # Consider estimating the 10% and 90% in the data by quantile regression. X11() plot(Age, SBP) taus = c(0.1,0.5,0.9) xx = seq(min(Age),max(Age),0.5) for(tau in taus){ Pred = rq(SBP ~ Age, tau=tau) b = coef(Pred) yy = b[1] + b[2]*xx lines(xx,yy) } ########################################################################### data(engel) # a dataset in the quantreg library attach(engel) X11() plot(x,y,xlab="household income",ylab="food expenditure") # remove an outlier x1 = engel$x[-138] y1 = engel$y[-138] # using ols Pred = lm(y1 ~ x1) Pred X11() plot(x1,y1,xlab="household income",ylab="food expenditure") lines(x1, fitted(Pred)) # bootstrap the linear model library(simpleboot) lboot <- lm.boot(Pred, R = 1000) summary(lboot) X11() plot(lboot) # using quantile regression X11() plot(x1,y1,xlab="household income",ylab="food expenditure") taus = c(0.1,0.5,0.9) xx = seq(min(x1),max(x1),100) for(tau in taus){ Pred = rq(y1~x1,tau=tau) print(Pred) b = coef(Pred) yy = b[1] + b[2]*xx lines(xx,yy) }