# Create grid for prediction attach(syndata) row=50 col=50 syngrid=as.data.frame(expand.grid(Xcoord=seq(from=min(Xcoord),to=max(Xcoord),len=col),Ycoord=seq(from=min(Ycoord),to=max(Ycoord),len=row))) gridlen=length(syngrid[,1]) # Initial crude model (without covariate of age) m1=gam(case~lo(Xcoord,Ycoord,span=0.5), family=binomial(logit), data=syndata) # Use initial crude model to find optimal span step.gam(m1, scope=list("loc"=~lo(Xcoord, Ycoord, span=0.05) + lo(Xcoord, Ycoord, span=0.10) + lo(Xcoord, Ycoord, span=0.15) + lo(Xcoord, Ycoord, span=0.20) + lo(Xcoord, Ycoord, span=0.25) + lo(Xcoord, Ycoord, span=0.30) + lo(Xcoord, Ycoord, span=0.35) + lo(Xcoord, Ycoord, span=0.40) + lo(Xcoord, Ycoord, span=0.45) + lo(Xcoord, Ycoord, span=0.50) + lo(Xcoord, Ycoord, span=0.55) + lo(Xcoord, Ycoord, span=0.60) + lo(Xcoord, Ycoord, span=0.65) + lo(Xcoord, Ycoord, span=0.70) + lo(Xcoord, Ycoord, span=0.75) + lo(Xcoord, Ycoord, span=0.80) + lo(Xcoord, Ycoord, span=0.85) + lo(Xcoord, Ycoord, span=0.90) + lo(Xcoord, Ycoord, span=0.95)), family=binomial(logit), data=syndata) # Expected step.gam output #Start: case ~ lo(Xcoord, Ycoord, span = 0.5); AIC= 2434.577 #Trial: case ~ lo(Xcoord, Ycoord, span = 0.45); AIC= 2434.331 #Trial: case ~ lo(Xcoord, Ycoord, span = 0.55); AIC= 2435.39 #Step : case ~ lo(Xcoord, Ycoord, span = 0.45) ; AIC= 2434.331 # #Trial: case ~ lo(Xcoord, Ycoord, span = 0.4); AIC= 2434.376 #Call: #gam(formula = case ~ lo(Xcoord, Ycoord, span = 0.45), family = ..1, data = ..2, trace = F) # #Degrees of Freedom: 2000 total; 1989.59 Residual #Residual Deviance: 2413.51 # Crude model using optimal span m.45=gam(case~lo(Xcoord,Ycoord,span=0.45), family=binomial(logit), data=syndata) # Crude null model without smooth term for location m.0=gam(case~1, family=binomial(logit), data=syndata) # Convert from log odds to odds ratios (ORs) using the whole study area as the reference, dividing the odds at each grid point by the odds calculated by the reduced model omitting the location smoothing term. synoutput=syngrid[,1:2] # create output data frame synrefnull=mean(predict.gam(m.0)) # log odds from model without location synoutput[,3]=exp(predict.gam(m.45,syngrid)-synrefnull) # converting log odds to odds ratios # S-Plus provides an approximate p-value for globat test for significance of location based on the assumption of a chi-square distribution for the difference in deviances splusglobstat=(data.frame(anova(m.0, m.45, test="Chi"))[2,7]) # Because the chi-square assumption is only approximate for GAMs and may not work well in all situations, we estimate the distribution of the statistic under the null hypothesis using a permutation test # We examine pointwise departures from the null hypothesis of a flat surface using the same set of permutations we use for calculating the global statistics ID=length(case) coords=syndata[,3:4] m.data=syndata permresults=matrix(ncol=1000, nrow=gridlen, 0) permresults[,1]=predict.gam(m.45,syngrid) permrank=matrix(ncol=1000, nrow=gridlen, 0) devrank=matrix(ncol=1, nrow=1000, 0) devdata=data.frame(anova(m.0, m.45, test="Chi")) devrank[1,1]=devdata[2,6] i_2 j_2 while (j<1001) { index_sample(ID, replace=F) m.data[,3:4]_coords[index,] # randomly reassign individuals to the eligible residences # For each permutation, we run the GAM using the optimal span of the original data m.gam_gam(case ~ lo(Xcoord, Ycoord, 0.45), family = binomial(logit), data = m.data) permresults[,i]_predict.gam(m.gam, syngrid) # create a distribution of log odds for each point devdata_data.frame(anova(m.0, m.gam, test="Chi")) devrank[i,1]_devdata[2,6] # compute the deviance statistic for the model generated from the permuted data i_i+1 if (j%%100==0) print(i) j_j+1 } # p-value for deviance global statistic tempdev_rank(devrank) devglobstat_(1000-tempdev[1])/1000 # Areas of significantly decreased and increased risk include all points that rank in the lower 2.5% and upper 2.5% of the pointwise distributions k _ 1 while (k<=gridlen) { permrank[k,]_rank(permresults[k,]) k _ k + 1 } synoutput[,4]_permrank[,1] # rank at each point of the log odds for original data in the distribution of log odds generated from permuted data # synoutput can now be mapped