/****************************************************************/ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: REGDEMO2 */ /* TITLE: REGRESSION OF SUBSETS OF VARIABLES */ /* PRODUCT: IML */ /* SYSTEM: ALL */ /* KEYS: MATRIX REGR SUGI6 */ /* PROCS: IML */ /* DATA: */ /* */ /* SUPPORT: LWB UPDATE: */ /* REF: */ /* MISC: */ /* */ /****************************************************************/ proc iml; /*-------INITIALIZATION-------------------------------* | C,CSAVE THE CROSSPRODUCTS MATRIX | | N NUMBER OF OBSERVATIONS | | K TOTAL NUMBER OF VARIABLES TO CONSIDER | | L NUMBER OF VARIABLES CURRENTLY IN MODEL | | IN A 0-1 VECTOR OF WHETHER VARIABLE IS IN | | B PRINT COLLECTS RESULTS (L MSE RSQ BETAS ) | *----------------------------------------------------*/ START INITIAL; N=NROW(X); K=NCOL(X); K1=K+1; IK=1:K; BNAMES={NPARM MSE RSQUARE} ||VARNAMES; /*---CORRECT BY MEAN, ADJUST OUT INTERCEPT PARAMETER---*/ Y=Y-Y[+,]/N; /* CORRECT Y BY MEAN */ X=X-REPEAT(X[+,]/N,N,1); /* CORRECT X BY MEAN */ XPY=X`*Y; /* CROSSPRODUCTS */ YPY=Y`*Y; XPX=X`*X; FREE X Y; /* NO LONGER NEED THE DATA*/ CSAVE=(XPX || XPY) // (XPY`|| YPY); /* SAVED COPY OF CROSSPRODUCTS*/ FINISH; /*-----FORWARD METHOD------------------------------------------*/ START FORWARD; PRINT / "FORWARD SELECTION METHOD"; FREE BPRINT; C=CSAVE; IN=REPEAT(0,K,1); L=0; /* NO VARIABLES ARE IN */ DFE=N-1; MSE=YPY/DFE; SPROB=0; DO WHILE(SPROB<.15 & L,]; /* LOCATION OF MAXIMUM IN INDX */ SPROB=(1-PROBT(SQRT(TSQR[IMAX,]),DFE))*2; IF SPROB<.15 THEN DO; /* IF T-TEST SIGNIFICANT */ II=INDX[,IMAX]; /* PICK MOST SIGNIFICANT */ RUN SWP; /* ROUTINE TO SWEEP */ RUN BPR; /* ROUTINE TO COLLECT RESULTS */ END; END; PRINT BPRINT[COLNAME=BNAMES] ; FINISH; /*-----BACKWARD METHOD----------------------------------------*/ START BACKWARD; PRINT / "BACKWARD ELIMINATION "; FREE BPRINT; C=CSAVE; IN=REPEAT(0,K,1); II=1:K; RUN SWP; RUN BPR; /* START WITH ALL VARIABLES IN*/ SPROB=1; DO WHILE(SPROB>.15 & L>0); INDX=LOC(IN); /* WHERE ARE THE VARIABLES IN? */ CD=VECDIAG(C)[INDX,]; /* XPX DIAGONALS */ CB=C[INDX,K1]; /* BVALUES */ TSQR=CB#CB/(CD#MSE); /* SQUARES OF T TESTS */ IMIN=TSQR[>:<,]; /* LOCATION OF MINIMUM IN INDX */ SPROB=(1-PROBT(SQRT(TSQR[IMIN,]),DFE))*2; IF SPROB>.15 THEN DO; /* IF T-TEST NONSIGNIFICANT */ II=INDX[,IMIN]; /* PICK LEAST SIGNIFICANT */ RUN SWP; /* ROUTINE TO SWEEP IN VARIABLE*/ RUN BPR; /* ROUTINE TO COLLECT RESULTS */ END; END; PRINT BPRINT[COLNAME=BNAMES] ; FINISH; /*-----STEPWISE METHOD-----------------------------------------*/ START STEPWISE; PRINT /"STEPWISE METHOD"; FREE BPRINT; C=CSAVE; IN=REPEAT(0,K,1); L=0; DFE=N-1; MSE=YPY/DFE; SPROB=0; DO WHILE(SPROB<.15 & L,]; /* LOCATION OF MAXIMUM IN INDX */ SPROB=(1-PROBT(SQRT(TSQR[IMAX,]),DFE))*2; IF SPROB<.15 THEN DO; /* IF T-TEST SIGNIFICANT */ II=INDX[,IMAX]; /* FIND INDEX INTO C */ RUN SWP; /* ROUTINE TO SWEEP */ RUN BACKSTEP; /* CHECK IF REMOVE ANY TERMS */ RUN BPR; /* ROUTINE TO COLLECT RESULTS */ END; END; PRINT BPRINT[COLNAME=BNAMES] ; FINISH; /*----ROUTINE TO BACKWARDS-ELIMINATE FOR STEPWISE--*/ START BACKSTEP; IF NROW(NINDX)=0 THEN RETURN; BPROB=1; DO WHILE(BPROB>.15 & L:<,]; /* LOCATION OF MINIMUM IN NINDX*/ BPROB=(1-PROBT(SQRT(TSQR[IMIN,]),DFE))*2; IF BPROB>.15 THEN DO; II=NINDX[,IMIN]; RUN SWP; RUN BPR; END; END; FINISH; /*-----SEARCH ALL POSSIBLE MODELS----------------------------*/ START ALL; /*---USE METHOD OF SCHATZOFF ET AL. FOR SEARCH TECHNIQUE--*/ BETAK=REPEAT(0,K,K); /* RECORD ESTIMATES FOR BEST L-PARAM MODEL*/ MSEK=REPEAT(1E50,K,1);/* RECORD BEST MSE PER # PARMS */ RSQK=REPEAT(0,K,1); /* RECORD BEST RSQUARE */ INK=REPEAT(0,K,K); /* RECORD BEST SET PER # PARMS */ LIMIT=2##K-1; /* NUMBER OF MODELS TO EXAMINE */ C=CSAVE; IN=REPEAT(0,K,1);/* START OUT WITH NO VARIABLES IN MODEL*/ DO KK=1 TO LIMIT; RUN ZTRAIL; /* FIND WHICH ONE TO SWEEP */ RUN SWP; /* SWEEP IT IN */ BB=BB//(L||MSE||RSQ||(C[IK,K1]#IN)`); IF MSE