% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/f_hyperEstimation.R
\name{exploreHypers}
\alias{exploreHypers}
\title{Explore various hyperparameter estimates}
\usage{
exploreHypers(data, theta_init, squashed = TRUE, zeroes = FALSE,
  N_star = 1, method = c("nlminb", "nlm", "bfgs"), param_limit = 100,
  max_pts = 20000, std_errors = FALSE)
}
\arguments{
\item{data}{A data frame from \code{\link{processRaw}} containing columns
named \emph{N}, \emph{E}, and (if squashed) \emph{weight}.}

\item{theta_init}{A data frame of initial hyperparameter guesses with
columns ordered as:
\eqn{\alpha_1, \beta_1, \alpha_2, \beta_2, P}.}

\item{squashed}{A scalar logical (\code{TRUE} or \code{FALSE}) indicating
whether or not data squashing was used.}

\item{zeroes}{A scalar logical specifying if zero counts are included.}

\item{N_star}{A positive scalar whole number value for the minimum count
size to be used for hyperparameter estimation. If zeroes are used, set
\code{N_star} to \code{NULL}.}

\item{method}{A scalar string indicating which optimization procedure is to
be used. Choices are \code{"nlminb"}, \code{"nlm"}, or \code{"bfgs"}.}

\item{param_limit}{A scalar numeric value for the largest acceptable value
for the \eqn{\alpha} and \eqn{\beta} estimates. Used to help protect
against unreasonable/erroneous estimates.}

\item{max_pts}{A scalar whole number for the largest number of data points
allowed. Used to help prevent extremely long run times.}

\item{std_errors}{A scalar logical indicating if standard errors should be
returned for the hyperparameter estimates.}
}
\value{
A list including the data frame \code{estimates} of hyperparameter
  estimates corresponding to the initial guesses from \code{theta_init} (plus
  convergence results):
  \itemize{
    \item{\emph{code}: }{The convergence code returned by the chosen
      optimization function (see \code{\link[stats]{nlminb}},
      \code{\link[stats]{nlm}}, and \code{\link[stats]{optim}} for details).}
    \item{\emph{converge}: }{A logical indicating whether or not convergence
      was reached. See "Details" section for more information.}
    \item{\emph{in_bounds}: }{A logical indicating whether or not the
      estimates were within the bounds of the parameter space (upper bound
      for \eqn{\alpha_1, \beta_1, \alpha_2, and \beta_2} was determined by
      the \code{param_limit} argument).}
    \item{\emph{minimum}: }{The negative log-likelihood value corresponding
      to the estimated optimal value of the hyperparameter.}
  }
  Also returns the data frame \code{std_errs} if standard errors are
  requested.
}
\description{
\code{exploreHypers} finds hyperparameter estimates using a variety of
starting points to examine the consistency of the optimization procedure.
}
\details{
The \code{method} argument determines which optimization procedure
  is used. All the options use functions from the \code{\link{stats}}
  package:
  \itemize{
    \item{\code{"nlminb":}} \code{\link[stats]{nlminb}}
    \item{\code{"nlm":}} \code{\link[stats]{nlm}}
    \item{\code{"bfgs":}} \code{\link[stats]{optim}} (\emph{method = "BFGS"})
  }

Since this function runs multiple optimization procedures, it is
  best to start with 5 or less initial starting points (rows in
  \code{theta_init}). If the function runs in a reasonable amount of time,
  this number can be increased.

This function should not be used with very large data sets unless
  data squashing is used first since each optimization call will take a long
  time.

It is recommended to use \code{N_star = 1} when practical. Data
  squashing (see \code{\link{squashData}}) can be used to reduce the number
  of data points.

The \emph{converge} column in the resulting data frame was
  determined by examining the convergence \emph{code} of the chosen
  optimization method. In some instances, the code is somewhat ambiguous. The
  determination of \emph{converge} was intended to be conservative (leaning
  towards FALSE when questionable). See the documentation for the chosen
  method for details about \emph{code}.

Standard errors, if requested, are calculated using the observed
  Fisher information matrix as discussed in DuMouchel (1999).
}
\section{Warning}{
 Make sure to properly specify the \code{squashed},
  \code{zeroes}, and \code{N_star} arguments for your data set, since these
  will determine the appropriate likelihood function. Also, this function
  will not filter out data points. For instance, if you use \code{N_star = 2}
  you must filter out the ones and zeroes (if present) from \code{data} prior
  to using this function.
}

\examples{
#Start with 2 or more guesses
theta_init <- data.frame(
  alpha1 = c(0.2, 0.1),
  beta1  = c(0.1, 0.1),
  alpha2 = c(2,   10),
  beta2  = c(4,   10),
  p      = c(1/3, 0.2)
)
data(caers)
proc <- processRaw(caers)
squashed <- squashData(proc, bin_size = 100, keep_pts = 100)
squashed <- squashData(squashed, count = 2, bin_size = 10, keep_pts = 20)
suppressWarnings(
  exploreHypers(squashed, theta_init = theta_init)
)

}
\references{
DuMouchel W (1999). "Bayesian Data Mining in Large Frequency
  Tables, With an Application to the FDA Spontaneous Reporting System."
  \emph{The American Statistician}, 53(3), 177-190.
}
\seealso{
\code{\link[stats]{nlminb}}, \code{\link[stats]{nlm}}, and
  \code{\link[stats]{optim}} for optimization details

\code{\link{squashData}} for data preparation

Other hyperparameter estimation functions: \code{\link{autoHyper}},
  \code{\link{hyperEM}}
}
\concept{hyperparameter estimation functions}
\keyword{openEBGM}
