AppPopnEco/CountAnalysis.R at master · quantitativeconservationlab/AppPopnEco · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#######################################################################
#######################################################################
##     This script was created by Dr. Jen Cruz as part of            ##
##            the Applied Population Ecology Class                  ###
##                                                                   ##
## Here we import our cleaned data for a single year of point count  ##
#  observations for Piute ground squirrels at the NCA and run a      ##
## closed population N-mixture analysis. The model is hierarchical    #
#  with : (1) an ecological submodel linking abundance to             #
## environmental predictors at each site; (2) an observation submodel #
## linking detection probability to relevant predictors.             ##
##                                                                   ##
# Abundance is expected to be higher in sites with more sagebrush     #
# and lower in those with more cheatgrass.                            #                                        #
# Detection may be related to observer effects and to time of day     #
#######################################################################

##### Set up your workspace and load relevant packages -----------
# Clean your workspace to reset your R environment. #
rm( list = ls() )
# Check that you are in the right project folder
getwd()

#install relevant packages
install.packages( 'Rtools' )
install.packages( "nmixgof" ) #for evaluating N-mixture models
#load relevant packages
library( tidyverse )#includes dplyr, tidyr and ggplot2
library( unmarked ) #runs N-mixture models
library( MuMIn ) #calculates pseudo-R^2
library( AICcmodavg) #gof tests (Duarte et al. 2018)
library( nmixgof ) #more gof tests (Knape et al. 2018)
## end of package load ###############
###################################################################
#### Load or create data -----------------------------------------
# set directory where your data are:
datadir <- paste( getwd(), "/Data/", sep = "" )
# load cleaned data
closeddf <- read.csv( file = paste( datadir, "closed_counts.csv", sep = ""),
                      header = TRUE )
#view
head( closeddf ); dim( closeddf )
#### End of data load -------------
########### standardising functions  #################
#genetic function to standardise covariates using Gelman's suggestion:
standardise <- function( xmat, stdevs = 2, marg = c( 1, 2, 3 ) ) {
  mean.xmat = mean( as.vector( xmat ), na.rm = TRUE )
  sd.xmat = sd( as.vector( xmat ), na.rm = TRUE )
  std.xmat = apply( xmat, marg, function( x ){
    ( x - mean.xmat ) / (stdevs * sd.xmat ) } )
  return( std.xmat )
}
###################################################################

####################################################################
##### Ready data for analysis --------------
# Let's define our unmarked dataframe:
# Start by defining which columns represent the response (counts):
umf <- unmarkedFramePCount(
  y = as.matrix( closeddf[ ,c("count.j1", "count.j2", "count.j3")]),
  # Define predictors at the site level:
  siteCovs = closeddf[ ,c("sagebrush", "cheatgrass")],
  # Define predictors at the survey level as a list:
  obsCovs = list( obsv = closeddf[ ,c("observer.j1", "observer.j2", "observer.j3")],
  time = closeddf[ ,c('time.j1', 'time.j2', 'time.j3') ],
  time2 = cbind( (closeddf$time.j1)^2, (closeddf$time.j2)^2,
                                     (closeddf$time.j3)^2 ) ) )

# View summary of unmarked dataframe:
summary( umf )
#now scale ecological predictors:
#sc <- apply( siteCovs(umf), MARGIN = 2, FUN = scale )
scsage <- standardise( as.matrix(siteCovs(umf)[,"sagebrush"]), stdevs = 2, marg = 2 )
sccheat <- standardise( as.matrix(siteCovs(umf)[,"cheatgrass"]), stdevs = 2, marg = 2 )
# We replace the predictors in our unmarked dataframe with the scaled values:
siteCovs( umf )[,c("sagebrush","cheatgrass") ] <- cbind(scsage, sccheat)
#now for time:
sctime <- standardise( as.matrix( obsCovs(umf)[,c("time") ] ), marg = 2 )
sctime2 <- standardise( as.matrix( obsCovs(umf)[,c("time2") ] ), marg = 2 )
#replace with scaled values:
obsCovs(umf)[,c("time","time2") ] <-cbind(sctime, sctime2)

#recheck
summary( umf )
### end data prep -----------
### Analyze data ------------------------------------------
# We are now ready to perform our analysis. We start with a full model:
fm.closed <- pcount( ~ 1 + obsv + time + time2
                   ~ 1 + sagebrush + cheatgrass,
                   #Define the maximum possible abundance
                   #during the primary occasion
                    K = 80,
                   mixture = c("P"),
                   data = umf )
# Note that we start with the observation submodel #
#We then define the ecological submodel
# You should try alternative K values to make sure that your model
#isn't sensitive to the value you provided.

# View model results:
fm.closed

# Estimate confidence intervals:
confint( fm.closed, type = "state" )
# coefficients for detection submodel:
confint( fm.closed, type = 'det' )
#
# Based on the overlap of the 95% CIs for your predictor coefficients, #
# which may be important to each of your responses? #
# Answer:
#
#What is the mean abundance from our model?
exp(coef(fm.closed[1]))
plogis(coef(fm.closed[2] ) )
#############end full model ###########
##########################################################################
# Model fit and evaluation -----------------------------------------------
# We start with goodness of fit (GoF) outlined by Duarte et al. 2018 #
# Ecological modelling 374:51-59 and available via AICmodavg package #
# The Nmix.gof.test relies on a Pearson chi-square to assess the fit of #
# N-mixture models. The approach uses bootstrapping to estimate the p values #
# The test also estimates a c-hat measure of overdispersion, as the  #
# observed test statistic divided by the mean of the simulated test statistics #

# Let's compute observed chi-square, assess significance, and estimate c-hat
gof.boot <- Nmix.gof.test( fm.closed, nsim = 100,
                           print.table = TRUE )
#view
gof.boot
# What does the output tell us about our model fit?
# Answer:
#
# We also evaluate how well our full model did against the null model #
# by estimating pseudo-R^2, based on Nagelkerke, N.J.D. (2004) A Note #
# on a General Definition of the Coefficient of Determination. Biometrika 78,#
# pp. 691-692.#
# We run the null model
fm.null <- pcount( ~ 1 ~ 1,
            K = 80, data = umf )
#view
fm.null
# Now build the list with the two models of interest:
rms <- fitList( 'full' = fm.closed,
                'null' = fm.null )
# Then use model selection function from unmarked, defining which is the null:
unmarked::modSel(rms, nullmod = "null" )
# What does this tell us?
# Answer:
#
# Now use gof checks outlined in Knape et al. 2018 MEE 9:2102-2114
# We start by estimating overdispersion metrics
chat( fm.closed, type = 'marginal' )
chat( fm.closed, type = 'site-sum' )
chat( fm.closed, type = 'observation' )

# Plot rq residuals against untransformed numeric predictors. This may help
# detect problems with the functional form assumed in a model
residcov( fm.closed )
# What do the plots tell you?
# Answer:
#
# Plot residuals against fitted values. Site-sum randomized quantile residuals
# are used for site covariates while marginal residuals are used for
# observation covariates.
residfit( fm.closed, type = 'site-sum' )
# Plot the observation model residuals
residfit( fm.closed, type = 'observation' )
# What did Knape et al. 2018 say these residuals were useful for?
# Answer:
#

# Niw plot Qq plots of randomized residuals against standard normal quantiles. #
# Under a good fit, residuals should be close to the identity line.
residqq( fm.closed, type = 'site-sum' )
residqq( fm.closed, type = 'observation' )
# What do these plots indicate?
# Answer:
#
# What is some of the advice recommended by Knape et al. 2018 if we want
# to use abundance estimates from these N-mixture models?
# Answer:
#
# Now try fitting other functional forms (e.g. ZIP or Negative Binomial)
# Do you get a better fit?
# Answer:
#
# Are there other ways to fit time that may make more sense?
# Answer:
#

#########################################################################
##### Summarizing model output ##############
# Estimate partial prediction plots for predictors with 95% CIs not overlapping zero:
# Start by creating our datasets to predict over
# how many values do we use:
n <- 100
# what are the min max times:
closeddf %>% select( time.j1, time.j2, time.j3 ) %>%
  summarise_all(list(min, max) )
#use them to define your bounds:
Time <- round(seq( 0, 360, length.out = n ),0)
Time2 <- Time^2
time.std <- standardise( as.matrix(Time), marg = 2 )
time2.std <- standardise( as.matrix(Time2), marg=2 )
# now for detection
detData <- data.frame( obsv = factor(c("tech.1", "tech.1","tech.1", "tech.1"),
              levels = c("tech.1", "tech.2","tech.3", "tech.4") ),
              time = time.std, time2 = time2.std )
#predict partial relationship:
pred.time <- predict( fm.closed, type = "det", newdata = detData,
                           appendData = TRUE )

# create plot for detection submodel:
timep <- cbind( pred.time[,c("Predicted", "lower", "upper") ], Time ) %>%
  # define x and y values
  ggplot(., aes( x = Time, y = Predicted ) ) +
  #choose preset look
  theme_bw( base_size = 15 ) +
  # add labels
  labs( x = "Time (mins pass 6:00am)", y = "Probability of Detection" ) +
  # add band of confidence intervals
  geom_smooth( aes(ymin = lower, ymax = upper ),
               stat = "identity",
               size = 1.5, alpha = 0.5, color = "grey" ) +
  # add mean line on top
  geom_line( linewidth = 2 )
#view
timep

# choose one more predictor to plot marginalized effects for.
# Answer:

############################################################################
################## Save your data and workspace ###################
# Save workspace:
save.image( "CountResults.RData" )

########## End of saving section ##################################

############# END OF SCRIPT #####################################