# This code was prepared by Professor Alemi and Elrafey # based on data from Agreseti # To begin with read the data # # # Orth Rehab Sev SNF Cost Observed # Joe Yes High A Above 405 # Joe Yes High A Below 268 # Joe Yes High B Above 453 # Joe Yes High B Below 228 # Joe Yes Low A Above 23 # Joe Yes Low A Below 23 # Joe Yes Low B Above 30 # Joe Yes Low B Below 19 # Joe No High A Above 13 # Joe No High A Below 218 # Joe No High B Above 28 # Joe No High B Below 201 # Joe No Low A Above 2 # Joe No Low A Below 19 # Joe No Low B Above 1 # Joe No Low B Below 18 # Jim Yes High A Above 1 # Jim Yes High A Below 17 # Jim Yes High B Above 1 # Jim Yes High B Below 17 # Jim Yes Low A Above 0 # Jim Yes Low A Below 1 # Jim Yes Low B Above 1 # Jim Yes Low B Below 8 # Jim No High A Above 1 # Jim No High A Below 117 # Jim No High B Above 1 # Jim No High B Below 133 # Jim No Low A Above 0 # Jim No Low A Below 12 # Jim No Low B Above 0 # Jim No Low B Below 17 # # The data were put in a file called 5Way.csv in the working directory data = read.csv("5Way.csv") # Convert data to numeric for(i in 1:5){data[,i]=as.numeric(data[,i])} # Use log-linear modeling # The dependent variable is frequency of observations # Independent variables are main effects and pairwise variables model = glm(Observed ~ (Orth + Rehab + Sev + SNF + Cost)^2, data=t, family=poisson) summary(model) # This is the homogenous model. # Record the residual deviance and degrees of freedom. # Next progressively remove one interaction term to see the impact # Record the fit after each removal NoOR = update(model,.~.-(Orth:Rehab)) summary(NoOR) NoOA = update(model,.~.-(Orth:Cost)) summary(NoOA) NoRA = update(model,.~.-(Rehab:Cost)) summary(NoRA) NoOS = update(model,.~.-(Orth:Sev)) summary(NoOS) NoON = update(model,.~.-(Orth:SNF)) summary(NoON) NoRS = update(model,.~.-(Rehab:Sev)) summary(NoRS) NoAN = update(model,.~.-(SNF:Cost)) summary(NoAN) NoSA = update(model,.~.-(Sev:Cost)) summary(NoSA) NoRN = update(model,.~.-(Rehab:SNF)) summary(NoRN) # This is a summary of the various models and their performance # Model G2 Df # All 15.34 16 # -OR 201.20 17 # -OA 106.96 17 # -RA 513.47 17 # -OS 20.32 17 # -ON 18.72 17 # -RS 15.78 17 # -AN 25.16 17 # -SA 18.93 17 # -RN 16.32 17 # -RS-RN 16.74 18 # -RS-RN-ON 22.02 19 # -RS-RN-SA 19.09 19 # Select the model with the smallest increase in residual deviance. # This is the model with no RS, with residual deviance of 15.78 # It changes G2 by 15.78-15.34=.44. # This loss in fit is worth it for gain of 1 degree of freedom # Remove next RN, the next smallest increase in residual deviance NoRSRN = update(model,.~.-(Rehab:Sev+Rehab:SNF)) summary(NoRSRN) # This increases G2 by 0.95 for 1 additional degree of freedom # Still worth it # Check removal of another smallest increase in the residual deviance, ON NoRSRNON = update(model,.~.-(Rehab:Sev+Rehab:SNF+Orth:SNF)) summary(NoRSRNON) # The increase seemed a bit to high, 22.02. # An increase of 5.29 for 1 degree of freedom. # Seems too much. # Check the impact of removing a different association NoRSRNSA = update(model,.~.-(Rehab:Sev+Rehab:SNF+Sev:Cost)) summary(NoRSRNSA) # This is better at 19.09 # Change of 2.36 in G2 for gain of 1 degree of freedom # Stop model building # For each of the associations that remain # draw a line in between the two related nodes # This is a visual report of the associations you discovered # and associations you ruled out