Adam M. Wilson
r format(Sys.time(), "%B %d, %Y")
Load the model data we made in DataPrep.R
load("data/modeldata_annual.Rdata")
rv_meta=read.csv("data/vegtypecodes.csv")
sdat$vegn=rv_meta$code[match(sdat$veg,rv_meta$ID)]
rownames(sdat)=sdat$id
## clip tdat to include only 1984:2006 due to missing recent fire data
tdat=tdat[tdat$year%in%1984:2006,]We know have two data frames we'll use in the modeling. The first is the spatial data:
kable(head(sdat),row.names=F)| id | x | y | veg | cover | tmax | tmin | janrad | julrad | aspect | dem | tpi | firecount | vegn |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 83925 | 260445 | 6243525 | 18 | 1 | 28.19 | 9.413 | 8902 | 3029 | 2.135 | 152.5 | 7.934 | 1 | Peninsula Shale Renosterveld |
| 84598 | 260415 | 6243495 | 18 | 1 | 28.40 | 9.556 | 8781 | 2562 | 3.223 | 150.5 | 7.260 | 1 | Peninsula Shale Renosterveld |
| 84599 | 260445 | 6243495 | 18 | 1 | 28.19 | 9.459 | 8789 | 2686 | 2.612 | 149.6 | 6.183 | 1 | Peninsula Shale Renosterveld |
| 84600 | 260475 | 6243495 | 18 | 1 | 28.52 | 9.779 | 8515 | 2454 | 2.170 | 146.9 | 5.759 | 1 | Peninsula Shale Renosterveld |
| 84601 | 260505 | 6243495 | 18 | 1 | 28.79 | 10.022 | 8331 | 2112 | 2.421 | 138.2 | 5.770 | 1 | Peninsula Shale Renosterveld |
| 85271 | 260385 | 6243465 | 18 | 1 | 28.50 | 9.714 | 8619 | 2274 | 2.930 | 139.8 | 5.918 | 3 | Peninsula Shale Renosterveld |
And the second is the temporal data:
kable(head(tdat),row.names=F)| id | year | age | ndvi |
|---|---|---|---|
| 83925 | 1984 | -23 | 0.2790 |
| 83925 | 1985 | -24 | 0.5820 |
| 83925 | 1986 | -25 | 0.4830 |
| 83925 | 1987 | -26 | 0.3160 |
| 83925 | 1988 | -27 | 0.3080 |
| 83925 | 1989 | -28 | 0.4155 |
## subset to only two fynbos veg types
n=500
sid=c(
sample(which(sdat$vegn=="Peninsula Sandstone Fynbos"),n),
sample(which(sdat$vegn=="Peninsula Granite Fynbos - North"),n),
sample(which(sdat$vegn=="Hangklip Sand Fynbos"),n)
)
sdat=sdat[sid,]
#drop obs of unknown age or ndvi or with no data in sdat
tdat=tdat[!is.na(tdat$age)&
!is.na(tdat$ndvi)&
tdat$id%in%sdat$id
,]
## drop negative ages (time before first fire) for now
tdat=tdat[tdat$age>=0,]#### Set model name for naming objects below and create directory to hold output
mname=substr(
system(" git log --pretty=format:%H | head -n 1",intern=T),
1,8)
mname="v1"
if(!file.exists(paste("output/",mname,sep=""))) dir.create(paste("output/",mname,sep=""),recursive=T)
### subset dataset
holdout=0.50 #percent to hold out for validation
s=sort(sample(unique(sdat$id),round(length(unique(sdat$id))*(1-holdout)))); length(s)## [1] 750
write.csv(s,paste("output/",mname,"/",mname,"_subset.csv",sep=""),row.names=F)
sdat$subset=factor(ifelse(sdat$id%in%unique(tdat$id),ifelse(sdat$id%in%s,"Model Fitting","Validation"),"Prediction"),levels=c("Model Fitting","Validation","Prediction"),ordered=T)sdat$veg=as.factor(sdat$veg)
lm1=lm(dem~veg,data=sdat)
tveg=model.matrix(lm1)[,-1]
kable(head(tveg))| veg14 | veg16 | |
|---|---|---|
| 714121 | 0 | 1 |
| 713339 | 0 | 1 |
| 975631 | 0 | 1 |
| 743051 | 0 | 1 |
| 272501 | 0 | 1 |
| 930558 | 0 | 1 |
## Select and scale environmental data
envars=c("dem","tpi","tmax","tmin")
scaled=scale(as.matrix(sdat[,envars]))
env_full=cbind(intercept=1,scaled,tveg)
### Save the scaling parameters to convert fitted coefficients back to metric units later
beta.mu=c(intercept=0,attr(scaled,"scaled:center"),
rep(0,ncol(tveg)))
beta.sd=c(intercept=1,attr(scaled,"scaled:scale"),rep(1,ncol(tveg)))
rm(scaled) #drop the scaled datatdat_full=tdat
tdat=tdat[tdat$id%in%s,]; gc() ## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1250380 66.8 2251281 120.3 2251281 120.3
## Vcells 1642380 12.6 69325578 529.0 108189981 825.5
## create two env frames for fitting and prediction
env=env_full[rownames(env_full)%in%s,]
### Drop missing values
omit=unique(tdat$id)[as.numeric(which(is.na(apply(env,1,sum))))]; omit## numeric(0)
if(length(omit)>0){
env=env[!rownames(env)%in%omit,]
tdat=tdat[!tdat$id%in%omit,]
}
## create new id that goes from 1 to nGrid
tdat$id2=as.integer(as.factor(tdat$id)); gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1250482 66.8 2251281 120.3 2251281 120.3
## Vcells 1654189 12.7 55460462 423.2 108189981 825.5
## Get counts
nGrid=length(unique(tdat$id)) ;nGrid## [1] 727
nTime=length(unique(tdat$year)) ;nTime## [1] 23
nBeta=ncol(env) ;nBeta## [1] 7
## Write data object
data=list(
age=tdat$age,
ndvi=tdat$ndvi,
id=tdat$id2,
nObs=nrow(tdat),
env=env,
nGrid=nGrid,
nBeta=nBeta
)
## Function to generate initial values
gen.inits=function(nGrid,nBeta) { list(
## spatial terms
alpha=runif(nGrid,0.1,0.5),
gamma=runif(nGrid,0.1,.9),
lambda=runif(nGrid,0.2,1),
## spatial means
alpha.mu=runif(1,0.1,0.2),
## priors
gamma.beta=runif(nBeta,0,1),
gamma.tau=runif(1,1,5),
alpha.tau=runif(1,1,5),
lambda.beta=runif(nBeta,0,2),
lambda.tau=runif(1,0,2),
tau=runif(1,0,2)
)
}
## list of parameters to monitor (save)
params=c("gamma.beta","gamma.sigma","alpha",
"alpha.mu","alpha.sigma","lambda.beta","lambda.sigma")
### Save all data into Rdata object for model fitting
save(data,gen.inits,s,sdat,beta.mu,beta.sd,envars,env_full,tdat_full,
file=paste("output/",mname,"/",mname,"_inputdata.Rdata",sep="")) foutput=paste0("output/",mname,"/",mname,"_modeloutput.Rdata")
if(!file.exists(foutput)){
write.table(paste("Starting model ",mname," on ",date()),
paste0("output/",mname,"ModelStart.txt"))
## test compilation
t1=system.time(m <<- jags.model(file="workflow/6_Modeling/Model.R",
data=data,
inits=gen.inits(data$nGrid,data$nBeta),
n.chains=3,n.adapt=10000))
t2=system.time(mc <<- coda.samples(m,params,n.iter=10000))
save(m,mc,beta.mu,beta.sd,file=foutput)
}Model Summaries
if(!exists("mc")) load(foutput)
## Potentially thin the data
mc2=window(mc,thin=20,start=1)## Warning: start value not changed
## Warning: start value not changed
## Warning: start value not changed
### Extract regression coefficients
mc_reg=mc2[,grep("gamma[.]|lambda[.]",colnames(mc[[1]]))]
xyplot(mc_reg)densityplot(mc_reg)### Calculate convergence metrics
## Autocorrelation
ac=melt(autocorr.diag(mc_reg,lags=seq(0,200,5)));
ac$Var1=as.numeric(do.call(rbind,strsplit(as.character(ac$Var1)," "))[,2])
colnames(ac)=c("Lag","Parameter","value")Summarize parameter values:
kable(summary(mc_reg)[[2]])| 2.5% | 25% | 50% | 75% | 97.5% | |
|---|---|---|---|---|---|
| gamma.beta[1] | -3.3625 | -3.1954 | -3.0960 | -2.9973 | -2.8201 |
| gamma.beta[2] | -0.2023 | -0.0228 | 0.0748 | 0.1713 | 0.3653 |
| gamma.beta[3] | -0.0570 | 0.0196 | 0.0559 | 0.0977 | 0.1732 |
| gamma.beta[4] | -0.0902 | 0.0557 | 0.1318 | 0.2106 | 0.3709 |
| gamma.beta[5] | -0.3468 | -0.1910 | -0.0916 | -0.0009 | 0.1763 |
| gamma.beta[6] | -0.3256 | -0.1014 | 0.0211 | 0.1432 | 0.3409 |
| gamma.beta[7] | 0.4433 | 0.6434 | 0.7532 | 0.8600 | 1.0774 |
| gamma.sigma | 0.4803 | 0.5486 | 0.5855 | 0.6232 | 0.6972 |
| lambda.beta[1] | 0.6736 | 1.2907 | 1.7154 | 2.0810 | 2.6778 |
| lambda.beta[2] | -1.8408 | -0.6153 | -0.1719 | 0.3561 | 1.1372 |
| lambda.beta[3] | -0.1572 | 0.2119 | 0.4362 | 0.6770 | 1.1171 |
| lambda.beta[4] | -0.3089 | 0.6161 | 1.0954 | 1.6482 | 2.8175 |
| lambda.beta[5] | -3.0966 | -1.8248 | -1.2857 | -0.8328 | -0.2008 |
| lambda.beta[6] | -4.0651 | -3.1174 | -2.6702 | -2.2501 | -1.4741 |
| lambda.beta[7] | -2.6411 | -1.8216 | -1.3458 | -0.8684 | 0.0754 |
| lambda.sigma | 1.5039 | 1.6889 | 1.8006 | 1.9326 | 2.2041 |
names(beta.mu)[1] "intercept" "dem" "tpi" "tmax" "tmin" ""
[7] ""

