# This code was prepared by Professor Alemi and Elrafey
# based on data from Agreseti

# To begin with read the data
#
# 
#	Orth	Rehab	Sev	SNF	Cost	Observed
#	Joe	Yes	High	A	Above	405
#	Joe	Yes	High	A	Below	268
#	Joe	Yes	High	B	Above	453
#	Joe	Yes	High	B	Below	228
#	Joe	Yes	Low	A	Above	23
#	Joe	Yes	Low	A	Below	23
#	Joe	Yes	Low	B	Above	30
#	Joe	Yes	Low	B	Below	19
#	Joe	No	High	A	Above	13
#	Joe	No	High	A	Below	218
#	Joe	No	High	B	Above	28
#	Joe	No	High	B	Below	201
#	Joe	No	Low	A	Above	2
#	Joe	No	Low	A	Below	19
#	Joe	No	Low	B	Above	1
#	Joe	No	Low	B	Below	18
#	Jim	Yes	High	A	Above	1
#	Jim	Yes	High	A	Below	17
#	Jim	Yes	High	B	Above	1
#	Jim	Yes	High	B	Below	17
#	Jim	Yes	Low	A	Above	0
#	Jim	Yes	Low	A	Below	1
#	Jim	Yes	Low	B	Above	1
#	Jim	Yes	Low	B	Below	8
#	Jim	No	High	A	Above	1
#	Jim	No	High	A	Below	117
#	Jim	No	High	B	Above	1
#	Jim	No	High	B	Below	133
#	Jim	No	Low	A	Above	0
#	Jim	No	Low	A	Below	12
#	Jim	No	Low	B	Above	0
#	Jim	No	Low	B	Below	17
#
# The data were put in a file called 5Way.csv in the working directory

data = read.csv("5Way.csv")

# Convert data to numeric 
 
for(i in 1:5){data[,i]=as.numeric(data[,i])}

# Use log-linear modeling
# The dependent variable is frequency of observations
# Independent variables are main effects and pairwise variables

model = glm(Observed ~ (Orth + Rehab + Sev + SNF + Cost)^2, data=t, family=poisson)
summary(model)

# This is the homogenous model.  
# Record the residual deviance and degrees of freedom.
# Next progressively remove one interaction term to see the impact
# Record the fit after each removal

NoOR = update(model,.~.-(Orth:Rehab))
summary(NoOR)

NoOA = update(model,.~.-(Orth:Cost))
summary(NoOA)

NoRA = update(model,.~.-(Rehab:Cost))
summary(NoRA)

NoOS = update(model,.~.-(Orth:Sev))
summary(NoOS)

NoON = update(model,.~.-(Orth:SNF))
summary(NoON)

NoRS = update(model,.~.-(Rehab:Sev))
summary(NoRS)

NoAN = update(model,.~.-(SNF:Cost))
summary(NoAN)

NoSA = update(model,.~.-(Sev:Cost))
summary(NoSA)

NoRN = update(model,.~.-(Rehab:SNF))
summary(NoRN)

# This is a summary of the various models and their performance
#	Model		G2		Df	
#	All 		15.34		16	
#	 -OR		201.20		17	
#	 -OA		106.96		17	
#	 -RA		513.47		17	
#	 -OS		20.32		17	
#	 -ON		18.72		17	
#	 -RS		15.78		17	
#	 -AN		25.16		17	
#	 -SA		18.93		17	
#	 -RN		16.32		17	
#	 -RS-RN		16.74		18	
#	 -RS-RN-ON	22.02		19	
#	 -RS-RN-SA	19.09		19	

# Select the model with the smallest increase in residual deviance.  
# This is the model with no RS, with residual deviance of 15.78 
# It changes G2 by 15.78-15.34=.44.
# This loss in fit is worth it for gain of 1 degree of freedom
# Remove next RN, the next smallest increase in residual deviance

NoRSRN = update(model,.~.-(Rehab:Sev+Rehab:SNF))
summary(NoRSRN)

# This increases G2 by 0.95 for 1 additional degree of freedom
# Still worth it
# Check removal of another smallest increase in the residual deviance, ON

NoRSRNON = update(model,.~.-(Rehab:Sev+Rehab:SNF+Orth:SNF))
summary(NoRSRNON)

# The increase seemed a bit to high, 22.02.
# An increase of 5.29 for 1 degree of freedom.
# Seems too much.
# Check the impact of removing a different association

NoRSRNSA = update(model,.~.-(Rehab:Sev+Rehab:SNF+Sev:Cost))
summary(NoRSRNSA)

# This is better at 19.09
# Change of 2.36 in G2 for gain of 1 degree of freedom
# Stop model building
# For each of the associations that remain 
# draw a line in between the two related nodes
# This is a visual report of the associations you discovered
# and associations you ruled out