# ============================================================================
# Main logitr functions
# ============================================================================

#' The main function for estimating logit models
#'
#' Use this function to estimate multinomial (MNL) and mixed logit (MXL)
#' models with "Preference" space or "Willingness-to-pay" (WTP) space utility
#' parameterizations. The function includes an option to run a multistart
#' optimization loop with random starting points in each iteration, which is
#' useful for non-convex problems like MXL models or models with WTP space
#' utility parameterizations. The main optimization loop uses the `nloptr()`
#' function to minimize the negative log-likelihood function.
#' @keywords logitr mnl mxl wtp willingness-to-pay mixed logit
#'
#' @param data The choice data, formatted as a `data.frame` object.
#' @param choice The name of the column that identifies the choice variable.
#' @param obsID The name of the column that identifies each choice
#' observation.
#' @param pars The names of the parameters to be estimated in the model.
#' Must be the same as the column names in the `data` argument. For WTP space
#' models, do not include price in `pars`.
#' @param price The name of the column that identifies the price variable.
#' Required for WTP space models. Defaults to `NULL`.
#' @param randPars A named vector whose names are the random parameters and
#' values the distribution: `'n'` for normal or `'ln'` for log-normal.
#' Defaults to `NULL`.
#' @param randPrice The random distribution for the price parameter: `'n'` for
#' normal or `'ln'` for log-normal. Only used for WTP space MXL models.
#' Defaults to `NULL`.
#' @param modelSpace Set to `'wtp'` for WTP space models. Defaults to `"pref"`.
#' @param weights The name of the column that identifies the weights to be
#' used in model estimation. Defaults to `NULL`.
#' @param panelID The name of the column that identifies the individual (for
#' panel data where multiple observations are recorded for each individual).
#' Defaults to `NULL`.
#' @param clusterID The name of the column that identifies the cluster
#' groups to be used in model estimation. Defaults to `NULL`.
#' @param robust Determines whether or not a robust covariance matrix is
#' estimated. Defaults to `FALSE`. Specification of a `clusterID` or
#' `weights` will override the user setting and set this to `TRUE' (a
#' warning will be displayed in this case). Replicates the functionality of
#' Stata's cmcmmixlogit.
#' @param numMultiStarts is the number of times to run the optimization loop,
#' each time starting from a different random starting point for each parameter
#' between `startParBounds`. Recommended for non-convex models, such as WTP
#' space models and mixed logit models. Defaults to `1`.
#' @param startParBounds sets the `lower` and `upper` bounds for the starting
#' parameters for each optimization run, which are generated by
#' `runif(n, lower, upper)`. Defaults to `c(-1, 1)`.
#' @param startVals is vector of values to be used as starting values for the
#' optimization. Only used for the first run if `numMultiStarts > 1`. Defaults
#' to `NULL`.
#' @param useAnalyticGrad Set to `FALSE` to use numerically approximated
#' gradients instead of analytic gradients during estimation. For now, using
#' the analytic gradient is faster for MNL models but slower for MXL models.
#' Defaults to `TRUE`.
#' @param scaleInputs By default each variable in `data` is scaled to be
#' between 0 and 1 before running the optimization routine because it usually
#' helps with stability, especially if some of the variables have very large or
#' very small values (e.g. `> 10^3` or `< 10^-3`). Set to `FALSE` to turn this
#' feature off. Defaults to `TRUE`.
#' @param standardDraws By default, a new set of standard normal draws are
#' generated during each call to `logitr` (the same draws are used during each
#' multistart iteration). The user can override those draws by providing a
#' matrix of standard normal draws if desired. Defaults to `NULL`.
#' @param numDraws The number of Halton draws to use for MXL models for the
#' maximum simulated likelihood. Defaults to `50`.
#' @param options A list of options for controlling the `nloptr()` optimization.
#' Run `nloptr::nloptr.print.options()` for details.
#' @param choiceName No longer used as of v0.2.3 - if provided, this is passed
#' to the `choice` argument and a warning is displayed.
#' @param obsIDName No longer used as of v0.2.3 - if provided, this is passed
#' to the `obsID` argument and a warning is displayed.
#' @param parNames No longer used as of v0.2.3 - if provided, this is passed
#' to the `pars` argument and a warning is displayed.
#' @param priceName No longer used as of v0.2.3 - if provided, this is passed
#' to the `price` argument and a warning is displayed.
#' @param weightsName No longer used as of v0.2.3 - if provided, this is passed
#' to the `weights` argument and a warning is displayed.
#' @param clusterName No longer used as of v0.2.3 - if provided, this is passed
#' to the `clusterID` argument and a warning is displayed.
#' @param cluster No longer used as of v0.2.3 - if provided, this is passed
#' to the `clusterID` argument and a warning is displayed.
#' @details
#' The the `options` argument is used to control the detailed behavior of the
#' optimization and must be passed as a list, e.g. `options = list(...)`.
#' Below are a list of the default options, but other options can be included.
#' Run `nloptr::nloptr.print.options()` for more details.
#'
#' |    Argument    |    Description    |    Default    |
#' |:---------------|:------------------|:--------------|
#' |`xtol_rel`|The relative `x` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`xtol_abs`|The absolute `x` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`ftol_rel`|The relative `f` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`ftol_abs`|The absolute `f` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`maxeval`|The maximum number of function evaluations for the `nloptr` optimization loop. |`1000`|
#' |`algorithm`|The optimization algorithm that `nloptr` uses.|`"NLOPT_LD_LBFGS"`|
#' |`print_level`|The print level of the `nloptr` optimization loop.|`0`|
#'
#' @return
#' The function returns a list object containing the following objects.
#'
#' |    Value    |    Description    |
#' |:------------|:------------------|
#' |`coef`|The model coefficients at convergence.|
#' |`logLik`|The log-likelihood value at convergence.|
#' |`nullLogLik`|The null log-likelihood value (if all coefficients are 0).|
#' |`gradient`|The gradient of the log-likelihood at convergence.|
#' |`hessian`|The hessian of the log-likelihood at convergence.|
#' |`startPars`|The starting values used.|
#' |`multistartNumber`|The multistart run number for this model.|
#' |`multistartSummary`|A summary of the log-likelihood values for each multistart run (if more than one multistart was used).|
#' |`time`|The user, system, and elapsed time to run the optimization.|
#' |`iterations`|The number of iterations until convergence.|
#' |`message`|A more informative message with the status of the optimization result.|
#' |`status`|An integer value with the status of the optimization (positive values are successes). Use [statusCodes()] for a detailed description.|
#' |`call`|The matched call to `logitr()`.|
#' |`inputs`|A list of the original inputs to `logitr()`.|
#' |`data`|A list of the original data provided to `logitr()` broken up into components used during model estimation.|
#' |`numObs`|The number of observations.|
#' |`numParams`|The number of model parameters.|
#' |`freq`|The frequency counts of each choice alternative.|
#' |`modelType`|The model type, `'mnl'` for multinomial logit or `'mxl'` for mixed logit.|
#' |`weightsUsed`|`TRUE` or `FALSE` for whether weights were used in the model.|
#' |`numClusters`|The number of clusters.|
#' |`parSetup`|A summary of the distributional assumptions on each model parameter (`"f"`="fixed", `"n"`="normal distribution", `"ln"`="log-normal distribution").|
#' |`parIDs`|A list identifying the indices of each parameter in `coef` by a variety of types.|
#' |`scaleFactors`|A vector of the scaling factors used to scale each coefficient during estimation.|
#' |`standardDraws`|The draws used during maximum simulated likelihood (for MXL models).|
#' |`options`|A list of options for controlling the `nloptr()` optimization. Run `nloptr::nloptr.print.options()` for details.|
#'
#' @export
#' @examples
#' # For more detailed examples, visit
#' # https://jhelvy.github.io/logitr/articles/
#'
#' library(logitr)
#'
#' # Estimate a MNL model in the Preference space
#' mnl_pref <- logitr(
#'   data   = yogurt,
#'   choice = "choice",
#'   obsID  = "obsID",
#'   pars   = c("price", "feat", "brand")
#' )
#'
#' # Estimate a MNL model in the WTP space, using a 10-run multistart
#' mnl_wtp <- logitr(
#'   data           = yogurt,
#'   choice         = "choice",
#'   obsID          = "obsID",
#'   pars           = c("feat", "brand"),
#'   price          = "price",
#'   modelSpace     = "wtp",
#'   numMultiStarts = 10
#' )
#'
#' # Estimate a MXL model in the Preference space with "feat" and "brand"
#' # following normal distributions
#' mxl_pref <- logitr(
#'   data   = yogurt,
#'   choice = "choice",
#'   obsID  = "obsID",
#'   pars   = c("price", "feat", "brand"),
#'   randPars = c(feat = "n", brand = "n")
#' )
logitr <- function(
  data,
  choice,
  obsID,
  pars,
  price           = NULL,
  randPars        = NULL,
  randPrice       = NULL,
  modelSpace      = "pref",
  weights         = NULL,
  panelID         = NULL,
  clusterID       = NULL,
  robust          = FALSE,
  numMultiStarts  = 1,
  useAnalyticGrad = TRUE,
  scaleInputs     = TRUE,
  startParBounds  = c(-1, 1),
  standardDraws   = NULL,
  numDraws        = 50,
  startVals       = NULL,
  options         = list(
    print_level = 0,
    xtol_rel    = 1.0e-6,
    xtol_abs    = 1.0e-6,
    ftol_rel    = 1.0e-6,
    ftol_abs    = 1.0e-6,
    maxeval     = 1000,
    algorithm   = "NLOPT_LD_LBFGS"
  ),
  parNames, # Outdated argument names as of v0.2.3
  choiceName,
  obsIDName,
  priceName,
  weightsName,
  clusterName,
  cluster
) {

  call <- match.call()

  # Argument names were changed in v0.2.3
  calls <- names(sapply(call, deparse))[-1]
  if (any("parNames" %in% calls)) {
    pars <- parNames
    warning("Use 'pars' instead of 'parNames'")
  }
  if (any("choiceName" %in% calls)) {
    choice <- choiceName
    warning("Use 'choice' instead of 'choiceName'")
  }
  if (any("obsIDName" %in% calls)) {
    obsID <- obsIDName
    warning("Use 'obsID' instead of 'obsIDName'")
  }
  if (any("priceName" %in% calls)) {
    price <- priceName
    warning("Use 'price' instead of 'priceName'")
  }
  if (any("weightsName" %in% calls)) {
    weights <- weightsName
    warning("Use 'weights' instead of 'weightsName'")
  }
  if (any("clusterName" %in% calls)) {
    clusterID <- clusterName
    warning("Use 'clusterID' instead of 'clusterName'")
  }
  if (any("cluster" %in% calls)) {
    clusterID <- cluster
    warning("Use 'clusterID' instead of 'cluster'")
  }

  data <- as.data.frame(data) # tibbles break things

  modelInputs <- getModelInputs(
    data, choice, obsID, pars, randPars, price, randPrice, modelSpace, weights,
    panelID, clusterID, robust, numMultiStarts, useAnalyticGrad, scaleInputs,
    startParBounds, standardDraws, numDraws, startVals, call, options
  )
  allModels <- runMultistart(modelInputs)
  if (modelInputs$inputs$numMultiStarts > 1) {
    summary <- getMultistartSummary(allModels)
    model <- getBestModel(allModels, summary)
    model$multistartSummary <- summary
  } else {
    model <- allModels[[1]]
  }
  model <- appendModelInfo(model, modelInputs)
  message("Done!")
  return(model)
}

getMultistartSummary <- function(allModels) {
  summary <- data.frame(
    getListVal(allModels, "logLik"),
    getListVal(allModels, "iterations"),
    getListVal(allModels, "status")
  )
  colnames(summary) <- c("Log Likelihood", "Iterations", "Exit Status")
  return(summary)
}

getListVal <- function(object, val) {
  return(unlist(lapply(object, function(x) x[[val]])))
}

getBestModel <- function(allModels, summary) {
  summary$index <- seq(nrow(summary))
  good <- summary[which(summary$`Exit Status` > 0),]
  index <- good[which.max(good$`Log Likelihood`),]$index
  if (length(index) == 0) {
    # All NA values...none of the models converged, so just return the first
    index <- 1
  }
  return(allModels[[index]])
}
