% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/modelselection.R
\name{modelsearch}
\alias{modelsearch}
\title{Develop Best-fit Vital Rate Estimation Models For MPM Development}
\usage{
modelsearch(
  data,
  historical = TRUE,
  approach = "mixed",
  suite = "size",
  bestfit = "AICc&k",
  vitalrates = c("surv", "size", "fec"),
  surv = c("alive3", "alive2", "alive1"),
  obs = c("obsstatus3", "obsstatus2", "obsstatus1"),
  size = c("sizea3", "sizea2", "sizea1"),
  repst = c("repstatus3", "repstatus2", "repstatus1"),
  fec = c("feca3", "feca2", "feca1"),
  stage = c("stage3", "stage2", "stage1"),
  indiv = "individ",
  patch = NA,
  year = "year2",
  sizedist = "gaussian",
  fecdist = "gaussian",
  size.zero = FALSE,
  size.trunc = FALSE,
  fec.zero = FALSE,
  fec.trunc = FALSE,
  patch.as.random = TRUE,
  year.as.random = TRUE,
  juvestimate = NA,
  juvsize = FALSE,
  jsize.zero = FALSE,
  jsize.trunc = FALSE,
  fectime = 2,
  censor = NA,
  age = NA,
  indcova = NA,
  indcovb = NA,
  indcovc = NA,
  show.model.tables = TRUE,
  global.only = FALSE,
  quiet = FALSE
)
}
\arguments{
\item{data}{The vertical dataset to be used for analysis. This dataset should 
be of class \code{hfvdata}, but can also be a data frame formatted similarly
to the output format provided by functions \code{\link{verticalize3}()} or
\code{\link{historicalize3}()}, as long as all needed variables are properly
designated.}

\item{historical}{A logical variable denoting whether to assess the effects
of state in occasion \emph{t}-1 in addition to state in occasion \emph{t}.
Defaults to TRUE.}

\item{approach}{The statistical approach to be taken for model building. The 
default is \code{mixed}, which uses the mixed model approach utilized in 
packages \code{lme4} and \code{glmmTMB}. Other options include \code{glm},
which uses \code{lm}, \code{glm}, \code{glm.nb}, and related functions in
packages \code{MASS}, \code{stats}, and \code{pscl}.}

\item{suite}{This describes the global model for each vital rate estimation
and has the following possible values: \code{full}, includes main effects and
all two-way interactions of size and reproductive status; \code{main},
includes main effects only of size and reproductive status; \code{size},
includes only size (also interactions between size in historical model);
\code{rep}, includes only reproductive status (also interactions between
status in historical model); \code{cons}, all vital rates estimated only as
y-intercepts. If \code{approach = "glm"} and \code{year.as.random = FALSE},
then year is also included as a fixed effect, and, in the case of
\code{full}, included in two-way interactions. Defaults to \code{size}.}

\item{bestfit}{A variable indicating the model selection criterion for the
choice of best-fit model. The default is \code{AICc&k}, which chooses the 
best-fit model as the model with the lowest AICc or, if not the same model,
then the model that has the lowest degrees of freedom among models with
\eqn{\Delta AICc <= 2.0}. Alternatively, \code{AICc} may be chosen, in which
case the best-fit model is simply the model with the lowest AICc value.}

\item{vitalrates}{A vector describing which vital rates will be estimated via
linear modeling, with the following options: \code{surv}, survival
probability; \code{obs}, observation probability; \code{size}, overall size;
\code{repst}, probability of reproducing; and \code{fec}, amount of
reproduction (overall fecundity). Defaults to \code{c("surv", "size", "fec")}.}

\item{surv}{A vector indicating the variable names coding for status as alive
or dead in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("alive3", "alive2", "alive1")}.}

\item{obs}{A vector indicating the variable names coding for observation
status in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("obsstatus3", "obsstatus2", "obsstatus1")}.}

\item{size}{A vector indicating the variable names coding for size in
occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively. Defaults to
\code{c("sizea3", "sizea2", "sizea1")}.}

\item{repst}{A vector indicating the variable names coding for reproductive
status in occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively.
Defaults to \code{c("repstatus3", "repstatus2", "repstatus1")}.}

\item{fec}{A vector indicating the variable names coding for fecundity in
occasions \emph{t}+1, \emph{t}, and \emph{t}-1, respectively. Defaults to
\code{c("feca3", "feca2", "feca1")}.}

\item{stage}{A vector indicating the variables coding for stage in occasions
\emph{t}+1, \emph{t}, and \emph{t}-1. Defaults to \code{c("stage3", "stage2",
"stage1")}.}

\item{indiv}{A variable indicating the variable name coding individual
identity. Defaults to \code{individ}.}

\item{patch}{A variable indicating the variable name coding for patch, where
patches are defined as permanent subgroups within the study population.
Defaults to NA.}

\item{year}{A variable indicating the variable coding for observation
occasion \emph{t}. Defaults to \code{year2}.}

\item{sizedist}{The probability distribution used to model size. Options
include \code{gaussian} for the Normal distribution (default), \code{poisson}
for the Poisson distribution, and \code{negbin} for the negative binomial
distribution (quadratic parameterization).}

\item{fecdist}{The probability distribution used to model fecundity. Options
include \code{gaussian} for the Normal distribution (default), \code{poisson}
for the Poisson distribution, and \code{negbin} for the negative binomial
distribution (quadratic parameterization).}

\item{size.zero}{A logical variable indicating whether size distribution 
should be zero-inflated. Only applies to Poisson and negative binomial 
distributions. Defaults to FALSE.}

\item{size.trunc}{A logical variable indicating whether size distribution is
zero-truncated. Defaults to FALSE. Cannot be TRUE if \code{size.zero = TRUE}.}

\item{fec.zero}{A logical variable indicating whether fecundity distribution 
should be zero-inflated. Only applies to Poisson and negative binomial 
distributions. Defaults to FALSE.}

\item{fec.trunc}{A logical variable indicating whether fecundity distribution
is zero-truncated. Defaults to FALSE. Cannot be TRUE if
\code{fec.zero = TRUE}.}

\item{patch.as.random}{If set to TRUE and \code{approach = "lme4"}, then
\code{patch} is included as a random factor. If set to FALSE and
\code{approach = "glm"}, then \code{patch} is included as a fixed factor. All
other combinations of logical value and \code{approach} lead to \code{patch}
not being included in modeling. Defaults to TRUE.}

\item{year.as.random}{If set to TRUE and \code{approach = "lme4"}, then
\code{year} is included as a random factor. If set to FALSE, then \code{year}
is included as a fixed factor. All other combinations of logical value and
\code{approach} lead to \code{year} not being included in modeling. Defaults
to TRUE.}

\item{juvestimate}{An optional variable denoting the stage name of the
juvenile stage in the vertical dataset. If not NA, and \code{stage} is also
given (see below), then vital rates listed in \code{vitalrates} other than
\code{fec} will also be estimated from the juvenile stage to all adult
stages. Defaults to NA, in which case juvenile vital rates are not estimated.}

\item{juvsize}{A logical variable denoting whether size should be used as a
term in models involving transition from the juvenile stage. Defaults to
FALSE, and is only used if \code{juvestimate} does not equal NA.}

\item{jsize.zero}{A logical variable indicating whether size distribution 
of juveniles should be zero-inflated. Only applies to Poisson and negative
binomial distributions. Defaults to FALSE.}

\item{jsize.trunc}{A logical variable indicating whether size distribution in
juveniles is zero-truncated. Defaults to FALSE. Cannot be TRUE if
\code{jsize.zero = TRUE}.}

\item{fectime}{A variable indicating which year of fecundity to use as the
response term in fecundity models. Options include \code{2}, which refers to
occasion \emph{t}, and \code{3}, which refers to occasion \emph{t}+1.
Defaults to \code{2}.}

\item{censor}{A vector denoting the names of censoring variables in the
dataset, in order from occasion \emph{t}+1, followed by occasion \emph{t},
and lastly followed by occasion \emph{t}-1. Defaults to NA.}

\item{age}{Designates the name of the variable corresponding to age in the
vertical dataset. Defaults to NA, in which case age is not included in linear
models. Should only be used if building age x stage matrices.}

\item{indcova}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of an individual covariate. Defaults to NA.}

\item{indcovb}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of an individual covariate. Defaults to NA.}

\item{indcovc}{Vector designating the names in occasions \emph{t}+1,
\emph{t}, and \emph{t}-1 of an individual covariate. Defaults to NA.}

\item{show.model.tables}{If set to TRUE, then includes full modeling tables
in the output. Defaults to TRUE.}

\item{global.only}{If set to TRUE, then only global models will be built and
evaluated. Defaults to FALSE.}

\item{quiet}{If set to TRUE, then model building and selection will proceed
without warnings and diagnostic messages being issued. Note that this will
not affect warnings and messages generated as models themselves are tested.
Defaults to FALSE.}
}
\value{
This function yields an object of class \code{lefkoMod}, which is a
list in which the first 9 elements are the best-fit models for survival,
observation status, size, reproductive status, fecundity, juvenile survival,
juvenile observation, juvenile size, and juvenile transition to reproduction, 
respectively, followed by 9 elements corresponding to the model tables for
each of these vital rates, in order, followed by a single character element
denoting the criterion used for model selection, and ending on a quality
control vector:

\item{survival_model}{Best-fit model of the binomial probability of survival
from occasion \emph{t} to occasion \emph{t}+1. Defaults to 1.}
\item{observation_model}{Best-fit model of the binomial probability of 
observation in occasion \emph{t}+1 given survival to that occasion. Defaults
to 1.}
\item{size_model}{Best-fit model of size in occasion \emph{t}+1 given
survival to and observation in that occasion. Defaults to 1.}
\item{repstatus_model}{Best-fit model of the binomial probability of
reproduction in occasion \emph{t}+1, given survival to and observation in
that occasion. Defaults to 1.}
\item{fecundity_model}{Best-fit model of fecundity in occasion \emph{t}+1
given survival to, and observation and reproduction in that occasion.
Defaults to 1.}
\item{juv_survival_model}{Best-fit model of the binomial probability of
survival from occasion \emph{t} to occasion \emph{t}+1 of an immature
individual. Defaults to 1.}
\item{juv_observation_model}{Best-fit model of the binomial probability of 
observation in occasion \emph{t}+1 given survival to that occasion of an
immature individual. Defaults to 1.}
\item{juv_size_model}{Best-fit model of size in occasion \emph{t}+1 given
survival to and observation in that occasion of an immature individual.
Defaults to 1.}
\item{juv_reproduction_model}{Best-fit model of the binomial probability of
reproduction in occasion \emph{t}+1, given survival to and observation in
that occasion of an individual that was immature in occasion \emph{t}. This
model is technically not a model of reproduction probability for individuals
that are immature, rather reproduction probability here is given for
individuals that are mature in occasion \emph{t}+1 but immature in occasion
\emph{t}. Defaults to 1.}
\item{survival_table}{Full dredge model table of survival probability.}
\item{observation_table}{Full dredge model table of observation probability.}
\item{size_table}{Full dredge model table of size.}
\item{repstatus_table}{Full dredge model table of reproduction probability.}
\item{fecundity_table}{Full dredge model table of fecundity.}
\item{juv_survival_table}{Full dredge model table of immature survival 
probability.}
\item{juv_observation_table}{Full dredge model table of immature observation
probability.}
\item{juv_size_table}{Full dredge model table of immature size.}
\item{juv_reproduction_table}{Full dredge model table of immature
reproduction probability.}
\item{criterion}{Character variable denoting the criterion used to determine
the best-fit model.}
\item{qc}{Data frame with three variables: 1) Name of vital rate, 2) number
of individuals used to model that vital rate, and 3) number of individual
transitions used to model that vital rate.}
}
\description{
Function \code{modelsearch()} returns both a best-fit model for each vital
rate, and a model table showing all models tested. The final output can be
used as input in other functions within this package.
}
\section{Notes}{

The mechanics governing model building are fairly robust to errors and
exceptions. The function attempts to build global models, and simplifies
models automatically should model building fail. Model building proceeds
through the functions \code{\link[stats]{lm}()} (GLM with Gaussian response),
\code{\link[stats]{glm}()} (GLM with Poisson or binomial response),
\code{\link[MASS]{glm.nb}()} (GLM with negative binomial response),
\code{\link[pscl]{zeroinfl}()} (zero-inflated Poisson or negative binomial
response), \code{\link[lme4]{lmer}()} (mixed model with Gaussian response),
\code{\link[lme4]{glmer}()} (mixed model with binomial or Poisson response),
and \code{\link[glmmTMB]{glmmTMB}()} (mixed model with negative binomial,
or zero-truncated or zero-inflated Poisson or negative binomial response).
See documentation related to these functions for further information. Any
response term that is invariable in the dataset will lead to a best-fit model
for that response represented by a single constant value.

Exhaustive model building and selection proceeds via the
\code{\link[MuMIn]{dredge}()} function in package \code{MuMIn}. This function
is verbose, so that any errors and warnings developed during model building,
model analysis, and model selection can be found and dealt with.
Interpretations of errors during global model analysis may be found in
documentation in for the functions and packages mentioned. Package
\code{MuMIn} is used for model dredging (see \link[MuMIn]{dredge}()), and
errors and warnings during dredging can be interpreted using the
documentation for that package. Errors occurring during dredging lead to the
adoption of the global model as the best-fit, and the user should view all
logged errors and warnings to determine the best way to proceed. The 
\code{quiet = TRUE} option can be used to silence dredge warnings, but users
should note that automated model selection can be viewed as a black box, and
so care should be taken to ensure that the models run make biological sense,
and that model quality is prioritized.

Exhaustive model selection through dredging works best with larger datasets
and fewer tested parameters. Setting \code{suite = "full"} may initiate a
dredge that takes a dramatically long time, particularly if the model is
historical, individual covariates are used, or a zero-inflated distribution
is assumed. In such cases, the number of models built and tested will run at
least in the millions. Small datasets will also increase the error associated
with these tests, leading to adoption of simpler models overall. We do not
yet offer a parallelization option for function \code{modelsearch()}, but
plan to offer one in the future to speed this process up for particularly
large global models.

Care must be taken to build models that test the impacts of state in occasion
\emph{t}-1 for historical models, and that do not test these impacts for
ahistorical models. Ahistorical matrix modeling particularly will yield
biased transition estimates if historical terms from models are ignored. This
can be dealt with at the start of modeling by setting 
\code{historical = FALSE} for the ahistorical case, and 
\code{historical = TRUE} for the historical case.

This function handles generalized linear models (GLMs) under zero-inflated
distributions using the \code{\link[pscl]{zeroinfl}()} function, and zero-
truncated distributions using the \code{\link[VGAM]{vglm}()} function. Model
dredging may fail with these functions, leading to the global model being
accepted as the best-fit model. However, model dredges of mixed models work
for all distributions. We encourage the use of mixed models in all cases.

The negative binomial and truncated negative binomial distributions use the
quadratic structure emphasized in Hardin and Hilbe (2018, 4th Edition of
Generalized Linear Models and Extensions). The truncated negative binomial
distribution may fail to predict size probabilities correctly when dispersion
is near that expected of the Poisson distribution. To prevent this problem,
we have integrated a cap on the overdispersion parameter. However, when using
this distribution, please check the matrix column sums to make sure that they
do not predict survival greater than 1.0. If they do, then please use either
the negative binomial distribution or the zero-truncated Poisson
distribution.
}

\examples{
\donttest{
# Lathyrus example
data(lathyrus)

sizevector <- c(0, 4.6, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8,
  9)
stagevector <- c("Sd", "Sdl", "Dorm", "Sz1nr", "Sz2nr", "Sz3nr", "Sz4nr",
  "Sz5nr", "Sz6nr", "Sz7nr", "Sz8nr", "Sz9nr", "Sz1r", "Sz2r", "Sz3r", 
  "Sz4r", "Sz5r", "Sz6r", "Sz7r", "Sz8r", "Sz9r")
repvector <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
obsvector <- c(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
matvector <- c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
immvector <- c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
propvector <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
  0)
indataset <- c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
binvec <- c(0, 4.6, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)

lathframeln <- sf_create(sizes = sizevector, stagenames = stagevector, 
  repstatus = repvector, obsstatus = obsvector, matstatus = matvector, 
  immstatus = immvector, indataset = indataset, binhalfwidth = binvec, 
  propstatus = propvector)

lathvertln <- verticalize3(lathyrus, noyears = 4, firstyear = 1988,
  patchidcol = "SUBPLOT", individcol = "GENET", blocksize = 9, 
  juvcol = "Seedling1988", sizeacol = "lnVol88", repstracol = "Intactseed88",
  fecacol = "Intactseed88", deadacol = "Dead1988", 
  nonobsacol = "Dormant1988", stageassign = lathframeln, stagesize = "sizea",
  censorcol = "Missing1988", censorkeep = NA, NAas0 = TRUE, censor = TRUE)

lathvertln$feca2 <- round(lathvertln$feca2)
lathvertln$feca1 <- round(lathvertln$feca1)
lathvertln$feca3 <- round(lathvertln$feca3)

lathmodelsln3 <- modelsearch(lathvertln, historical = TRUE, 
  approach = "glm", suite = "main", 
  vitalrates = c("surv", "obs", "size", "repst", "fec"), juvestimate = "Sdl",
  bestfit = "AICc&k", sizedist = "gaussian", fecdist = "poisson", 
  indiv = "individ", patch = "patchid", year = "year2",year.as.random = TRUE,
  patch.as.random = TRUE, show.model.tables = TRUE, quiet = TRUE)

# Here we use supplemental() to provide overwrite and reproductive info
lathsupp3 <- supplemental(stage3 = c("Sd", "Sd", "Sdl", "Sdl", "mat", "Sd", "Sdl"), 
  stage2 = c("Sd", "Sd", "Sd", "Sd", "Sdl", "rep", "rep"),
  stage1 = c("Sd", "rep", "Sd", "rep", "Sd", "mat", "mat"),
  eststage3 = c(NA, NA, NA, NA, "mat", NA, NA),
  eststage2 = c(NA, NA, NA, NA, "Sdl", NA, NA),
  eststage1 = c(NA, NA, NA, NA, "Sdl", NA, NA),
  givenrate = c(0.345, 0.345, 0.054, 0.054, NA, NA, NA),
  multiplier = c(NA, NA, NA, NA, NA, 0.345, 0.054),
  type = c(1, 1, 1, 1, 1, 3, 3), type_t12 = c(1, 2, 1, 2, 1, 1, 1),
  stageframe = lathframeln, historical = TRUE)

lathmat3ln <- flefko3(year = "all", patch = "all", stageframe = lathframeln, 
  modelsuite = lathmodelsln3, data = lathvertln, supplement = lathsupp3, 
  patchcol = "patchid", yearcol = "year2", year.as.random = FALSE,
  patch.as.random = FALSE, reduce = FALSE)

summary(lathmat3ln)
}

}
