# Create grid for prediction
attach(syndata)
row=50
col=50
syngrid=as.data.frame(expand.grid(Xcoord=seq(from=min(Xcoord),to=max(Xcoord),len=col),Ycoord=seq(from=min(Ycoord),to=max(Ycoord),len=row)))
gridlen=length(syngrid[,1])

# Initial crude model (without covariate of age) 
m1=gam(case~lo(Xcoord,Ycoord,span=0.5), family=binomial(logit), data=syndata)

# Use initial crude model to find optimal span
step.gam(m1, scope=list("loc"=~lo(Xcoord, Ycoord, span=0.05) + lo(Xcoord, Ycoord, span=0.10) + lo(Xcoord, Ycoord, span=0.15) + lo(Xcoord, Ycoord, span=0.20) + lo(Xcoord, Ycoord, span=0.25) + lo(Xcoord, Ycoord, span=0.30) + lo(Xcoord, Ycoord, span=0.35) + lo(Xcoord, Ycoord, span=0.40) + lo(Xcoord, Ycoord, span=0.45) + lo(Xcoord, Ycoord, span=0.50) + lo(Xcoord, Ycoord, span=0.55) + lo(Xcoord, Ycoord, span=0.60) + lo(Xcoord, Ycoord, span=0.65) + lo(Xcoord, Ycoord, span=0.70) + lo(Xcoord, Ycoord, span=0.75) + lo(Xcoord, Ycoord, span=0.80) +  lo(Xcoord, Ycoord, span=0.85) + lo(Xcoord, Ycoord, span=0.90) + lo(Xcoord, Ycoord, span=0.95)), family=binomial(logit), data=syndata)

# Expected step.gam output
#Start:  case ~ lo(Xcoord, Ycoord, span = 0.5); AIC= 2434.577 
#Trial:  case ~  lo(Xcoord, Ycoord, span = 0.45); AIC= 2434.331 
#Trial:  case ~  lo(Xcoord, Ycoord, span = 0.55); AIC= 2435.39 
#Step :  case ~ lo(Xcoord, Ycoord, span = 0.45) ; AIC= 2434.331 
#
#Trial:  case ~  lo(Xcoord, Ycoord, span = 0.4); AIC= 2434.376 
#Call:
#gam(formula = case ~ lo(Xcoord, Ycoord, span = 0.45), family = ..1, data = ..2, trace = F)
#
#Degrees of Freedom: 2000 total; 1989.59 Residual
#Residual Deviance: 2413.51


# Crude model using optimal span
m.45=gam(case~lo(Xcoord,Ycoord,span=0.45), family=binomial(logit), data=syndata)

# Crude null model without smooth term for location
m.0=gam(case~1, family=binomial(logit), data=syndata)

# Convert from log odds to odds ratios (ORs) using the whole study area as the reference, dividing the odds at each grid point by the odds calculated by the reduced model omitting the location smoothing term.  
synoutput=syngrid[,1:2]                                        	# create output data frame
synrefnull=mean(predict.gam(m.0))				# log odds from model without location		
synoutput[,3]=exp(predict.gam(m.45,syngrid)-synrefnull)		# converting log odds to odds ratios


# S-Plus provides an approximate p-value for globat test for significance of location based on the assumption of a chi-square distribution for the difference in deviances
splusglobstat=(data.frame(anova(m.0, m.45, test="Chi"))[2,7])

# Because the chi-square assumption is only approximate for GAMs and may not work well in all situations, we estimate the distribution of the statistic under the null hypothesis using a permutation test
# We examine pointwise departures from the null hypothesis of a flat surface using the same set of permutations we use for calculating the global statistics  
ID=length(case)				
coords=syndata[,3:4]  			
m.data=syndata
permresults=matrix(ncol=1000, nrow=gridlen, 0)
permresults[,1]=predict.gam(m.45,syngrid)
permrank=matrix(ncol=1000, nrow=gridlen, 0)

devrank=matrix(ncol=1, nrow=1000, 0)
devdata=data.frame(anova(m.0, m.45, test="Chi"))
devrank[1,1]=devdata[2,6]

i_2
 
j_2
while (j<1001) {
 index_sample(ID, replace=F)
 m.data[,3:4]_coords[index,]					# randomly reassign individuals to the eligible residences
 # For each permutation, we run the GAM using the optimal span of the original data
 m.gam_gam(case ~ lo(Xcoord, Ycoord, 0.45), family = binomial(logit), data = m.data)
 permresults[,i]_predict.gam(m.gam, syngrid)			# create a distribution of log odds for each point
 devdata_data.frame(anova(m.0, m.gam, test="Chi"))
 devrank[i,1]_devdata[2,6]					# compute the deviance statistic for the model generated from the permuted data
 i_i+1
 if (j%%100==0) print(i)
 j_j+1
 }

# p-value for deviance global statistic
tempdev_rank(devrank)
devglobstat_(1000-tempdev[1])/1000			

# Areas of significantly decreased and increased risk include all points that rank in the lower 2.5% and upper 2.5% of the pointwise distributions  
k _ 1
while (k<=gridlen) {
 permrank[k,]_rank(permresults[k,])
 k _ k + 1 }
synoutput[,4]_permrank[,1]					# rank at each point of the log odds for original data in the distribution of log odds generated from permuted data 

# synoutput can now be mapped