***Introductory Macros;

***Background: in an epidemiological study, we had a set of risk factor questionnaires (rfq) including 
	history of smoking, caffeine use, pesticide exposure, etc.
	Each questionnaire has it's own data set, and each data set needed to be merged with 
	a seperate dataset containing Parkinson's Disease status (case versus control).  
	A macro was utilized to do this more efficiently;

***Macro to merge rfq telephone dataset with PDStatus, to be used for Comparison;
%macro datasetmerge(prefix) ;
	proc sort data=rphone.&prefix ; by ID;
	run;

	data &prefix;
	merge
		rfm.Demo_Vars
		rphone.&prefix (in=&prefix) 
	;
	by ID; 
	if (&prefix=1);

	run;

	data &prefix._phone_haspd &prefix._phone_nopd &prefix._phone_pdonly;
	set &prefix;
	if PDStatus='Case' then output &prefix._phone_haspd;
	if PDStatus='Control' then output &prefix._phone_nopd;
	if PD_Grp='1' then output &prefix._phone_pdonly;
	run;
%mend;

***Call in datasetmerge macro for prefix cf -- caffeine;
%datasetmerge(cf);



***More advanced Macros;

*** Background:  This study was a validation study, and to compare response between methods
	of data collection, the kappa statistic was utilized (a non-paramentric statistic 
	measuring agreement).  The following macros prepare data and then perform proc freq 
	in order to calculate the kappa stat;


%macro KappaDataPrep(dataset1=, dataset2=, index_key=, ratingvar1=,ratingvar2=,Qtype=,webTime=);

****************************************;
* Organize data for input into Proc Freq;
****************************************;

*Merge datasets for comparisons together;

proc sort data=&dataset1 out=Rating1 (where =(webTime=&webTime));
	by &index_key;
proc sort data=&dataset2 out=Rating2;
	by &index_key;
run;

* Merge the corresponding ratings side to side. 
  Remove any records that do not have 2 ratings.
  (Since Proc Freq will ignore such records anyway)
  Assign a weight of 1 to each pair of ratings.;

Data RatingData;
	merge Rating1 (in=in1) Rating2 (in=in2);
	by &index_key;
	if (in1=1 and in2=1);
	if &ratingvar1 in (.,.S,.D,.R,.N) or &ratingvar2 in (.,.S,.D,.R,.N) then delete;
	wgt = 1;
	keep &index_key &ratingvar1 &ratingvar2 wgt webTime; 
run;

%mend;


***macro for Baseline comparisons;
%macro KappaDataPrepBS(dataset1=, dataset2=, index_key=, ratingvar1=,ratingvar2=,Qtype=);

****************************************;
* Organize data for input into Proc Freq;
****************************************;

*Merge datasets for comparisons together;

proc sort data=&dataset1 out=Rating1 ;
	by &index_key;
proc sort data=&dataset2 out=Rating2;
	by &index_key;
run;

* Merge the corresponding ratings side to side. 
  Remove any records that do not have 2 ratings.
  (Since Proc Freq will ignore such records anyway)
  Assign a weight of 1 to each pair of ratings.;

Data RatingData;
	merge Rating1 (in=in1) Rating2 (in=in2);
	by &index_key;
	if (in1=1 and in2=1);
	if &ratingvar1 in (.,.S,.D,.R,.N) or &ratingvar2 in (.,.S,.D,.R,.N) then delete;
	wgt = 1;
	keep &index_key &ratingvar1 &ratingvar2 wgt webTime; 
run;

%mend;





****************************************************************************
* Description: This macro prepares data for input into %KappaCalc.
*              Specifically, it finds rating values that were not used and 
*              assigns them a weight of zero.
****************************************************************************;

%global levelcount;

%macro KappaZeroWts(dataset=,levelvalues=,ratervar1=,ratervar2=);

***********************************;
* Find the values with zero weights;
***********************************;

* Count how many levels are present.;

data LevelCount;

  	shortVList = left(trim(compbl("&levelvalues"))); *compbl reduces multiple blanks to one;
  	lengthVList = length(shortVlist); 
  	lengthShortVList = length(compress(shortVList));
  	levelcount = lengthVList - lengthShortVList + 1;
	call symput('levelcount',levelcount);
	keep levelcount;
run;

* Make a dataset with ALL possible response levels;

data AllValues;
	do j = 1 to &levelcount;
	temp = scan("&levelvalues",j,' ');
	i = input(temp,best12.); * convert character to numeric values;
	output;
	end;

	drop j temp;
run;

* Make datasets that list the response levels that were used.;

data Rating1Values;
	set &dataset;
	keep &ratervar1;
run;

data Rating2Values;
	set &dataset;
	keep &ratervar2;
run;

* Make datasets that list the DISTINCT response levels that were used.;

proc sort data=Rating1Values noduprec;
	by &ratervar1;
run;

proc sort data=Rating2Values noduprec;
	by &ratervar2;
run;

* Identify and select the response levels that were NOT used.;

proc sql;
	create table MissingRating1Values as
	select *
	from AllValues left outer join Rating1Values
	on (i = &ratervar1)
	where &ratervar1 = .;
quit;

proc sql;
	create table MissingRating2Values as
	select *
	from AllValues left outer join Rating2Values
	on (i = &ratervar2)
	where &ratervar2 = .;
quit;

data MissingRating1Values;
	set MissingRating1Values;
	drop &ratervar1;
run;

data MissingRating2Values;
	set MissingRating2Values;
	drop &ratervar2;
run;

* Assign a weight of 0 to the response values not used.;

data MissingRating1Values;
	set MissingRating1Values;
	
	* assign 'fake' data.;
	&ratervar2 = i;
	wgt = 0;
	rename i = &ratervar1;
run;

data MissingRating2Values;
	set MissingRating2Values;
	
	* assign 'fake' data.;
	&ratervar1 = i;
	wgt = 0;
	rename i = &ratervar2;
run;

* Append the 0-weighted response levels to the 1-weighted response levels.;

proc append base=&dataset data=MissingRating1Values;
run;

proc append base=&dataset data=MissingRating2Values;
run;

%mend;


***Calling in the macros;

%let theQx= Caffeine;
%let HasPD= Yes;
ods pdf file='N:\ClinicalData\23ME\Analysis\Performance\Online\Weighted Kappa\caff_output_haspd.pdf';
/* QxA1 */
%kappadataprep (dataset1=_23Me.Web_rfq, 
				dataset2=cf_phone_haspd, 
				index_key=ID, 
				ratingvar1=caf1,
				ratingvar2=CFQA1,
				Qtype=cf,
				webTime=1); 
%KappaZeroWts(dataset=Ratingdata,levelvalues=0 1,ratervar1=caf1,ratervar2=CFQA1);


%KappaCalc2(dataset=Ratingdata,QType=caf,theQx=A1,ratervar1=caf1,ratervar2=CFQA1,kappaweight=(wt=FC));
ods pdf close;