1         Data analysis

 

 

1.1       Run descriptive statistics for each variable in each file (macro use proc mean as an example)

Sometimes, we need to run basic descriptive statistics for a great amount of variables for a quick review. To repeat command for different variables which might be a tedious and time consuming work. Here is a little macro can help you to speed it up and save your time.

 

 

options pageno=min formdlim='-' nodate nofmterr nonumber;

%MACRO MEAN(output=,sourcefile=, sort=);

PROC SORT data=&sourcefile;

BY &sort;

RUN;

PROC CONTENTS data=&sourcefile

                                                OUT=&output  NOPRINT;

RUN;

PROC SQL NOPRINT;

SELECT DISTINCT NAME

INTO: varname1-:varname999

FROM &output (where=(name notin (“SEX","FIELD13","VAR13","STATUS"))) ; /* where sentence here is to exclude those variables which you don’t want do analysis later, for example you don’t want to do freq for these categorical variables; or you can use such as (where=(type=2))*/

QUIT;

%DO i=1 %TO &sqlobs;

%PUT &i &&varname&i;

PROC MEANS data=&sourcefile  n nmiss missing min max;

VAR &&varname&i ;

BY &sort;

            /*PROC FREQ data=&sourcefile; tables &&varname&i*gender/norow nocol nopercent;*/

RUN;

%end;

%mend MEAN;

 

 

1.2       Confidence interval for binomial distribution

(reference: http://download.uni-hd.de/ftp/pub/sas/makros/cibinom.sas)

 

*   INPUT PARAMETERS:  CL - Confidence level as a proportion    *

*                           (ie for a 95% confidence level      *

*                           CL should be 0.95)                  *

*                      N  - Total sample size                   *

*                      R  - Number with characteristic          *

*                                                               *

*   OUTPUT PARAMETERS: P  - Observed proportion (R/N)           *

*                      PL - Lower confidence limit              *

*                      PU - Upper confidence limit              *

*                                                               *

*   USAGE NOTES:       Missing values for the output parameters *

*                      are generated if (i) N is equal to zero, *

*                      or if (ii) R is less than zero or        *

*                      greater than N or if (iii) CL is not     *

*                      between 0.0 and 1.0.   

 

%MACRO CIBINOM(CL,N,R,P,PL,PU);

IF ((&N) EQ 0) OR (NOT(0 LE (&R) LE (&N))) OR (NOT(0 LT (&CL) LT 1)) THEN DO;

&P = .;

&PL = .;

&PU =.;

END;

 

ELSE DO; &P = (&R)/(&N); IF (&R) EQ 0 THEN DO;

&PL = 0;

&PU = 1 - 10**(LOG10((1 - (&CL))/2)/(&N));

END;

 

ELSE IF (&R) EQ (&N) THEN DO;

&PL = 10**(LOG10((1 - (&CL))/2)/(&N));

&PU = 1;

END;

 

ELSE DO;

&PL = 1 - BETAINV(((1 + (&CL))/2),((&N) + 1 - (&R)),(&R));

&PU = BETAINV(((1 + (&CL))/2),((&R) + 1),((&N) - (&R)));

END;

 

END;

 

%MEND;

/*example*/

DATA;

CL = .95;

N  = 50;

R  = 5;

%CIBINOM(CL,N,R,P,PL,PU);

PROC PRINT;          run;

 

 

1.3       Confidence interval for Poisson distribution

(Reference: http://download.uni-hd.de/ftp/pub/sas/makros/cipoiss.sas)

 

INPUT PARAMETERS:  CL - Confidence level as a proportion    *
*                           (ie for 95% confidence limits  CL   *
*                           should be 0.95)                     *
*                      X  - Observed number of events           *
*                                                               *
*   OUTPUT PARAMETERS: LL - Lower confidence limit              *
*                      LU - Upper confidence limit              *
*                                                               *
*   USAGE NOTES:       Missing values for the output parameters *
*                      are generated if (I) X is less than zero *
*                      or if (ii) CL is not between 0.0 and 1.0 *
*                                                               *
*****************************************************************;

 

%MACRO CIPOISS(CL,X,LL,LU);

 

IF (&X) LT 0 OR NOT(0 LT (&CL) LT 1) THEN DO; &LL = .; &LU = .; END;

 

ELSE DO; IF (&X) NE 0 THEN DO; &LL = GAMINV((1 - (&CL))/2,(&X)); &LU = GAMINV((1 + (&CL))/2,(&X) + 1); END;

 

ELSE IF (&X) EQ 0 THEN DO; &LU = -LOG(1 - (&CL)); &LL = 0; END;

 

END;

 

%MEND;

 

*****************************************************************; ********** MACRO ENDS HERE. SAMPLE PROGRAM FOLLOWS ************; *****************************************************************;

 

TITLE1 'EXAMPLE RUN OF MACRO CIPOISS';

OPTIONS NOCENTER NODATE LS = 72;

DATA; CL = .95; X = 50; %CIPOISS(CL,X,LL,LU); PROC PRINT;

 

 

1.4       Proc tabulate function

 

options pagesize=max;

%MACRO TABLEIT(data=,var1=,var2=, var3=,class=,THESTAT=, THELABEL=, THEFMT=);

PROC TABULATE DATA=&data;

CLASS &class ;

VAR &var1 &var2 &var3;

TABLE &class, &var1*(&THESTAT)*F=&THEFMT

&var2*(&THESTAT)*F=&THEFMT &var3*(&THESTAT)*F=&THEFMT

/BOX="Mean value of &data data" ;

TITLE "mean value for the FAS Data";

RUN;

%MEND TABLEIT;

 

%tableit(data=fas_5,var1=summary_score_DCIS_ADH, var2=summary_score_invasive,

var3=summary_score_normal, class=treatment, thestat=mean, thelabel=Mean value,thefmt=4.2);