options nofmterr; libname mylib "C:\Users\dsengupta\Documents\Courses\HAP 835\citalopram";/*Change the location here*/ /*Contents of the data imported with the postition in the variables in the dataset*/ proc contents data=mylib.citalopram position; run; /*Data preparation*/ /****************************************************************************************************************** 1. Keep the variables of interest: Diagnosis of panic disorder(Panic_Diagnosis) is treatment, Citalopram success (CIT_Success) is outcome variable. 2. Deidentified_Patient_ID has been renamed as patient_id: patient Identification variable, unique to every patient 3. There are 11 covariates that we want to balance and we recode these; we create a new variable for each of the nominal variables(i.e. variables that has values in either 1/0 or yes/no) such that if the original variable has a value of 1, the new variable will have a value of a alphanumeric letter assigned by us. If the original variable has a value of 0, then the new variable will have a missing value. 4. For convenience, we name the new variable same as the old variable, but with an added underscore. So if the old variable was race, we call the new variable as race_. gender race (new variable id race_; if race was white, race="W"; if race was other than white, race_="N" Family_Hx_Depression(original variable renamed to depression): new variable is depression_ Generalized_Anxiety_Diagnosis(original variable renamed to anxiety): new variable is anxiety_ Social_Phobia_Diagnosis (original variable renamed to phobia): new variable is phobia_ Obsessive_Compulsive_Diagnosis(original variable renamed to OCD): new variable is OCD_ Alcohol_Abuse_Diagnosis(original variable renamed to alcohol): new variable is alcohol_ Drug_Abuse_Diagnosis(original variable renamed to drug): new variable is drug_ Hypochondriasis_Diagnosis(original variable renamed to hypochondriasis): new variable is hypochondriasis_ Somatoform_Diagnosis(original variable renamed to somatoform): new variable is somatoform_ Bulimia_Nervosa_Diagnosis(original variable renamed to bulimia): new variable is bulimia_ 5. Create strata by concatenating the variables that we created. ***************************************************************************************************************/ data citalopram; set mylib.citalopram(keep=Deidentified_Patient_ID gender race Family_Hx_Depression Generalized_Anxiety_Diagnosis Panic_Diagnosis Social_Phobia_Diagnosis Obsessive_Compulsive_Diagnosis Alcohol_Abuse_Diagnosis Drug_Abuse_Diagnosis Hypochondriasis_Diagnosis Somatoform_Diagnosis Bulimia_Nervosa_Diagnosis CIT_Success); if race="White" then race_="W"; else race_="N"; If Family_Hx_Depression=1 then depression_="E"; else depression_=""; If Generalized_Anxiety_Diagnosis=1 then anxiety_="G"; else anxiety_=""; If Social_Phobia_Diagnosis=1 then phobia_="P"; else phobia_=""; if Obsessive_Compulsive_Diagnosis=1 then OCD_="O"; else OCD_=""; if Alcohol_Abuse_Diagnosis=1 then alcohol_="A"; else alcohol_=""; if Drug_Abuse_Diagnosis=1 then drug_="D"; else drug_=""; if Hypochondriasis_Diagnosis=1 then hypochondriasis_="H"; else hypochondriasis_=""; if Somatoform_Diagnosis=1 then somatoform_="S"; else somatoform_=""; if Bulimia_Nervosa_Diagnosis=1 then bulimia_="B"; else bulimia_=""; rename Panic_Diagnosis=treatment CIT_Success=outcome deidentified_Patient_ID =patient_id Family_Hx_Depression =depression Generalized_Anxiety_Diagnosis=anxiety Social_Phobia_Diagnosis=phobia Obsessive_Compulsive_Diagnosis =OCD Alcohol_Abuse_Diagnosis=alcohol Drug_Abuse_Diagnosis=drug Hypochondriasis_Diagnosis=hypochondriasis Somatoform_Diagnosis=somatoform Bulimia_Nervosa_Diagnosis=bulimia; strata=CATS(gender,race_,depression_,anxiety_,phobia_,OCD_,alcohol_,drug_,hypochondriasis_,somatoform_,bulimia_); run; /*Format the values of treatment and outcome*/ proc format; value treatment 1="Case" 0="Control" ; value outcome 1="Success" 0="Failure" ; run; /*Showing our data in a table breaking down strata by treatment and outcome*/ ods listing close; ods rtf file="C:\Users\dsengupta\Documents\Courses\HAP 835\Citalopram\Citalopram Table.rtf" style=minimal; title j=l "Gender (Female=F, Male=M)" j=r "race_(White=W, other race=N)"; title2 j=l "depression_(E)" j=r "anxiety_(G)"; title3 j=l "Phobia_(S)" j=r "OCD_(O)"; title4 j=l "alcohol_(A)" j=r "drug_(D)"; title5 j=l "Hypochondriasis_(H)" j=r "Somatoform_(S)"; title6 j=l "Bulimia_ (B)"; proc report data=citalopram nowd ; where treatment ne .; column strata (treatment , outcome) total_case total_control; define strata/group width=20; define treatment/across "" width=20 order=data; define outcome/"" across width=20 order=data; define total_case/computed; define total_control/computed; compute total_case; total_case = _C2_+_C3_; endcomp; compute total_control; total_control = _C4_+_C5_; endcomp; format treatment treatment. outcome outcome.; run; ods rtf close; ods listing; /*Creating a table for unbalanced OR and their CI*/ proc delete data=unbalanced_OR; run; %macro Unbalanced_OR(covariate); proc sort data=citalopram; by descending treatment descending &covariate; run; Title "Table of presence of Panic disorder (treatment group) by &covariate (UNBALANCED DATA)"; proc freq data=citalopram order=data; tables treatment*&covariate /relrisk ; output relrisk out=ub_&covariate._; run; data ub_&covariate(drop=_rror_ l_rror u_rror); length covariate_name $15.; set ub_&covariate._(keep=_rror_ l_rror u_rror); covariate_name="&covariate"; Unbalanced_OR=round(_rror_,0.01); Unbalanced_UCL=round(u_rror,0.01); Unbalanced_LCL=round(l_rror,0.01); run; proc append base=unbalanced_OR data=ub_&covariate; run; %mend unbalanced_or; %unbalanced_or(gender); %unbalanced_or(race_); %unbalanced_or(Depression ); %unbalanced_or(Anxiety); %unbalanced_or(Phobia); %unbalanced_or(OCD); %unbalanced_or(Alcohol); %unbalanced_or(Drug); %unbalanced_or(Hypochondriasis); %unbalanced_or(Somatoform); %unbalanced_or(Bulimia); /*Create weights table*/ proc freq data=citalopram; where treatment ne .;/*exclude any observation where treatment was missing*/ tables strata*treatment/ out=count ; run; /*Tranform longitudinal data to wide form*/ proc transpose data=count out=trans_count(drop=_Name_ _label_) prefix=Count_treatment; by strata ; ID treatment; var count; label count_treatment0="count_controls" count_treatment1="count_cases" ; run; data weight; set trans_count; /*delete if either there were no observation for either cases or controls for any strata*/ if count_treatment1 =. or count_treatment0=. then delete; weight=count_treatment1/count_treatment0; run; /*Alternate code in SQL to create weight table*/ proc sql noprint; create table count_case as select strata , count(treatment) as count_treatment1 from citalopram where treatment=1 group by strata ; create table count_control as select strata , count(treatment) as count_treatment0 from citalopram where treatment=0 group by strata ; create table sql_weight as select *, count_treatment1/count_treatment0 as weight from count_case a, count_control b where a.strata=b.strata; quit; /*End of alternate SQL code*/ ods rtf file="C:\Users\dsengupta\Documents\Courses\HAP 835\citalopram\Citalopram weight Table.rtf" style=minimal; title j=l "Calculating weights of the controls by dividing the total number of cases by total number of controls in each strata"; proc print data=weight noobs; run; ods rtf close; options mprint symbolgen; /*Merge original data set citalopram with the dataset weight by strata*/ /*But wait! dont forget to sort first!!*/ proc sort data=citalopram; by strata; run; proc sort data=weight; by strata; run; data citalopram_weight; merge citalopram(in=a) weight(keep=strata weight in=b); by strata; If a and b; /*Assign a weight of 1 for cases; for controls, we assign the weight of the weight variable we just calculated in weight dataset*/ /*we delete any observation if weight was missing*/ if treatment=0 then weighted_observation=weight; else if treatment=1 then weighted_observation=1; if weighted_observation ne .; run; proc delete data=balanced_OR; run; %macro balanced_OR(covariate); proc sort data=citalopram_weight; by descending treatment descending &covariate; run; Title "Table of presence of Panic disorder (treatment group) by &covariate (BALANCED DATA)"; proc freq data=citalopram_weight order=data; tables treatment*&covariate /relrisk ; weight weighted_observation; /*apply the weights when calculating the OR for balanced data*/ output relrisk out=b_&covariate._; run; data b_&covariate(drop=_rror_ l_rror u_rror); length covariate_name $15.; set b_&covariate._(keep=_rror_ l_rror u_rror); covariate_name="&covariate"; balanced_OR=round(_rror_,0.01); balanced_UCL=round(u_rror,0.01); balanced_LCL=round(l_rror,0.01); run; proc append base=balanced_OR data=b_&covariate; run; %mend balanced_or; %balanced_or(gender); %balanced_or(race_); %balanced_or(Depression ); %balanced_or(Anxiety); %balanced_or(Phobia); %balanced_or(OCD); %balanced_or(Alcohol); %balanced_or(Drug); %balanced_or(Hypochondriasis); %balanced_or(Somatoform); %balanced_or(Bulimia); proc sort data=balanced_or; by covariate_name; run; proc sort data=unbalanced_or; by covariate_name; run; data table_or; merge balanced_or unbalanced_or; by covariate_name; run; ods pdf file="C:\Users\dsengupta\Documents\Courses\HAP 835\citalopram\SCB Graph.pdf"; options nodate nonumber; ods pdf nobookmarkgen; * Series plot; PROC SGPLOT DATA = table_or; SERIES X = covariate_name Y = unbalanced_OR/ LEGENDLABEL = 'Unbalanced Data' MARKERS LINEATTRS = (THICKNESS = 2); SERIES X = covariate_name Y = balanced_OR/ LEGENDLABEL = 'Balanced Data' MARKERS LINEATTRS = (THICKNESS = 2);; XAXIS label ="Covariate Name" TYPE = DISCRETE GRID; YAXIS LABEL = "ODDS Ratio" GRID VALUES = (0 TO 10 BY 1); TITLE 'Graphical Representation of Results of Stratified Covariate Balancing'; RUN; ods pdf close;