/****************************************************************/ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: REGDEMO */ /* TITLE: REGRESSION EXAMPLE */ /* PRODUCT: IML */ /* SYSTEM: ALL */ /* KEYS: MATRIX REGR SUGI6 */ /* PROCS: IML */ /* DATA: */ /* */ /* SUPPORT: RHD UPDATE: */ /* REF: */ /* MISC: */ /* */ /****************************************************************/ proc iml; /*---------REGRESSION ROUTINE------------*/ /* given x, and y, this fits y = x b + e */ /* by least squares. */ START REG; N=NROW(X); /* number of observations */ K=NCOL(X); /* number of variables */ XPX=X`*X; /* cross-products */ XPY=X`*Y; XPXI=INV(XPX); /* inverse crossproducts */ B=XPXI*XPY; /* parameter estimates */ YHAT=X*B; /* predicted values */ RESID=Y-YHAT; /* residuals */ SSE=RESID`*RESID; /* sum of squared errors */ DFE=N-K; /* degrees of freedom error */ MSE=SSE/DFE; /* mean squared error */ RMSE=SQRT(MSE); /* root mean squared error */ COVB=XPXI#MSE; /* covariance of estimates */ STDB=SQRT(VECDIAG(COVB)); /* standard errors */ T=B/STDB; /* ttest for estimates=0 */ PROBT=1-PROBF(T#T,1,DFE); /* significance probability */ PRINT NAME B STDB T PROBT; S=DIAG(1/STDB); CORRB=S*COVB*S; /* correlation of estimates */ PRINT ,"Covariance of Estimates", COVB[R=NAME C=NAME], "Correlation of Estimates",CORRB[R=NAME C=NAME]; IF NROW(TVAL)=0 THEN RETURN;/* is a t-value specified? */ PROJX=X*XPXI*X`; /* hat matrix */ VRESID=(I(N)-PROJX)*MSE; /* covariance of residuals */ VPRED =PROJX#MSE; /* covariance of predicted values */ H=VECDIAG(PROJX); /* hat leverage values */ LOWERM=YHAT-TVAL#SQRT(H*MSE); /*lower confidence limit for mean*/ UPPERM=YHAT+TVAL#SQRT(H*MSE); /* upper */ LOWER =YHAT-TVAL#SQRT(H*MSE+MSE);/*lower confidence limit for ind*/ UPPER =YHAT+TVAL#SQRT(H*MSE+MSE); /* upper */ PRINT ,,"Predicted Values, Residuals, and Limits" ,, Y YHAT RESID H LOWERM UPPERM LOWER UPPER; FINISH; /*---routine to test a linear combination of the estimates---*/ /* given L, this routine tests hypothesis that L b = 0. */ START TEST; DFN=NROW(L); LB=L*B; VLB=L*XPXI*L`; Q=LB`*INV(VLB)*LB /DFN; F=Q/MSE; PROB=1-PROBF(F,DFN,DFE); PRINT ,F DFN DFE PROB; FINISH; /*---run it on population of U.S. for Decades beginning 1790---*/ X= { 1 1 1, 1 2 4, 1 3 9, 1 4 16, 1 5 25, 1 6 36, 1 7 49, 1 8 64 }; Y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443}; NAME={"Intercept", "Decade", "Decade**2" }; TVAL=2.57; /* FOR 5 DF AT .025 LEVEL TO GET 95% confidence interval */ RESET FW=7; RUN REG; DO; PRINT ,"TEST Coef for Linear"; L= {0 1 0 } ; RUN TEST; PRINT ,"TEST Coef for Linear,Quad"; L= {0 1 0,0 0 1} ; RUN TEST; PRINT ,"TEST Linear+Quad = 0"; L= {0 1 1 } ; RUN TEST; END;