#' Bayesian estimation of mixture distributions
#' 
#' Gibbs samplers for sparse finite mixture Markov chain Monte Carlo (SFM MCMC) estimation.
#' 
#' @param data Vector of observations.
#' @param K Maximum number of mixture components.
#' @param dist String indicating the distribution of the mixture components;
#' Currently supports "normal", "skew_normal", "poisson" and "shifted_poisson".
#' @param priors List of priors; default is an empty list which implies the following priors:\cr
#' a0 = 1,\cr A0 = 200,\cr b0 = median(y),\cr B0 = (max(y) - min(y))^2 (normal),\cr
#' D_xi = 1,\cr D_psi =1, (skew normal: B0 = diag(D_xi,D_psi)), \cr c0 = 2.5,\cr
#' l0 = 1.1 (poisson),\cr l0 = 5 (shifted poisson),\cr L0 = 1.1/median(y),\cr L0 = l0 - 1 (shifted poisson),\cr
#' g0 = 0.5,\cr G0 = 100*g0/c0/B0 (normal),\cr 
#' G0 = g0/(0.5*var(y)) (skew normal).
#' @param nb_iter Number of MCMC iterations; default is 2000.
#' @param burnin Number of MCMC iterations used as burnin; default is nb_iter/2.
#' @param printing Showing MCMC progression ?
#' 
#' @return A list of class \code{BayesMixture} containing
#' \itemize{
#'  \item{data}{ - Same as argument.}
#'  \item{dist_type}{ - Type of the distribution (continuous or discrete).}
#'  \item{pars_names}{ - Names of the mixture components' parameters.}
#'  \item{mcmc}{ - Matrix of MCMC draws where the rows corresponding to burnin have been discarded.}
#'  \item{mcmc_all}{ - Original matrix of MCMC draws.}
#' }
#' 
#' @details
#' 
#' Let \eqn{y_i}, \eqn{i=1,\dots,n} denote observations.
#' A general mixture of \eqn{K} distributions from the same 
#' parametric family is given by:
#' \deqn{y_i \sim \sum_{k=1}^{K}\pi_k p(\cdot|\theta_k)}
#' with \eqn{\sum_{k=1}^{K}\pi_k=1} and \eqn{\pi_k\geq 0}, \eqn{k=1, ...,K}.
#' \cr\cr
#' The exact number of components does not have to be known a priori
#' when using the SFM MCMC approach. Rather, an upper bound is specified for the
#' number of components and the weights of superfluous components are shrunk
#' towards zero during the estimation. Following Malsiner-Walli et al. (2016)
#' a symmetric Dirichlet prior is used for the mixture weights:
#' \deqn{\pi_k \sim \text{Dirichlet}(e_0,\dots,e_0)}
#' where a Gamma hyperprior is used on the concentration parameter \eqn{e_0}:\cr\cr
#' \deqn{e_0 \sim \text{Gamma}\left(a_0, A_0\right)}
#' 
#' 
#' \strong{Mixture of Normal distributions}
#' 
#' Normal components take the form:
#' \deqn{p(y_i|\mu_k,\sigma_k) = \frac{1}{\sqrt{2 \pi} \
#'   \sigma_k} \exp\left( - \, \frac{1}{2}            \left(  \frac{y_i -
#'       \mu_k}{\sigma_k} \right)^2     \right)}
#' 
#' Independent conjugate priors are used for \eqn{\mu_k} and \eqn{\sigma^2_k}
#' (see for instance Malsiner-Walli et al. 2016) :
#' \deqn{\mu_k \sim \text{Normal}( \text{b}_0, \text{B}_0),}
#' \deqn{\sigma^{-2}_k \sim \text{Gamma}( \text{c}_0, \text{C}_0),}
#' \deqn{C_0 \sim \text{Gamma}( \text{g}_0, \text{G}_0).}
#' 
#' 
#' \strong{Mixture of skew-Normal distributions}
#' 
#' We use the skew-Normal of Azzalini (1985) which takes the form:
#' \deqn{p(y_i| \xi_k,\omega_k,\alpha_k) = \frac{1}{\omega_k\sqrt{2\pi}} \ \exp\left( - \,
#' \frac{1}{2}            \left(  \frac{y_i - \xi_k}{\omega_k} \right)^2\right) \
#' \left(1 + \text{erf}\left( \alpha_k\left(\frac{y_i - \xi_k}{\omega_k\sqrt{2}}\right)\right)\right)}
#' where \eqn{\xi_k} is a location parameter, \eqn{\omega_k} a scale parameter and \eqn{\alpha_k}
#' the shape parameter introducing skewness. For Bayesian estimation, we adopt the approach of
#' Fruhwirth-Schnatter and Pyne (2010) and use the following reparameterised random-effect model:
#' \deqn{z_i \sim TN_{[0,\infty)}(0, 1)}
#' \deqn{y_i|(S_i = k) = \xi_k + \psi_k z_i + \epsilon_i, \quad \epsilon_i \sim N(0, \sigma^2_k)}
#' where the parameters of the skew-Normal are recovered with
#' \deqn{\omega_k = \frac{\psi_k}{\sigma_k}, \qquad \omega^2_k = \sigma^2_k + \psi^2_k}
#' By defining a regressor \eqn{x_i = (1, z_i)'}, the skew-Normal mixture can be seen as
#' random effect model and sampled using standard techniques. Thus we use priors similar to
#' the Normal mixture model:
#' \deqn{(\xi_k, \psi_k)' \sim \text{Normal}(\text{b}_0, \text{B}_0),}
#' \deqn{\sigma^{-2}_k \sim \text{Gamma}(\text{c}_0, \text{C}_0),}
#' \deqn{\text{C}_0 \sim \text{Gamma}( \text{g}_0, \text{G}_0).}
#' We set \deqn{\text{b}_0 = (\text{median}(y), 0)'} and \deqn{\text{B}_0 = \text{diag}(\text{D}\_\text{xi}, \text{D}\_\text{psi})} with D_xi = D_psi = 1.
#' 
#' 
#' \strong{Mixture of Poisson distributions}
#' 
#' Poisson components take the form:
#' \deqn{p(y_i|\lambda_k) = \frac{1}{y_i!} \, \lambda^{y_i}_k \,\exp(-\lambda_k).}
#' The prior for \eqn{\lambda_k} follows from Viallefont et al. (2002):
#' \deqn{\lambda_k \sim \text{Gamma}(\text{l}_0,\text{L}_0).}
#' 
#' 
#' \strong{Mixture of shifted-Poisson distributions}
#' 
#' Shifted-Poisson components take the form
#' \deqn{p(y_i |\lambda_k, \kappa_k) = \frac{1}{(y_i - \kappa_k)!} \,
#' \lambda^{(y_i - \kappa_k)!}_k \,\exp(-\lambda_k)}
#' where \eqn{\kappa_k} is a location or shift parameter with uniform prior.
#' 
#' 
#' @references
#' 
#' \insertRef{malsiner-walli_model-based_2016}{BayesMultiMode}\cr\cr
#' \insertRef{fruhwirth-schnatter_bayesian_2010}{BayesMultiMode}\cr\cr
#' \insertRef{SFS:Mal:2019}{BayesMultiMode}\cr\cr
#' \insertRef{azzalini_1985}{BayesMultiMode}\cr\cr
#' \insertRef{viallefont2002bayesian}{BayesMultiMode}
#' 
#' @importFrom assertthat assert_that
#' @importFrom assertthat is.scalar
#' 
#' @examples
#' # Example with galaxy data ================================================
#' set.seed(123) 
#' 
#' # retrieve galaxy data
#' y = galaxy
#'
#' # estimation
#' bayesmix = bayes_estimation(data = y,
#'                            K = 5, #not many to run the example rapidly
#'                            dist = "normal",
#'                            nb_iter = 500, #not many to run the example rapidly
#'                            burnin = 100)
#'                            
#' # plot estimated mixture
#' # plot(bayesmix, max_size = 200)
#' 
#' # Changing priors ================================================
#' set.seed(123) 
#' 
#' # retrieve galaxy data
#' y = galaxy
#'
#' # estimation
#' K = 5
#' bayesmix = bayes_estimation(data = y,
#'                            K = K, #not many to run the example rapidly
#'                            dist = "normal",
#'                            priors = list(a0 = 10,
#'                                          A0 = 10*K),
#'                            nb_iter = 500, #not many to run the example rapidly
#'                            burnin = 100)
#'                            
#' # plot estimated mixture
#' # plot(bayesmix, max_size = 200)
#' 
#' # Example with DNA data =====================================================
#' \donttest{
#' set.seed(123) 
#' 
#' # retrieve DNA data
#' y = d4z4
#'
#' # estimation
#' bayesmix = bayes_estimation(data = y,
#'                            K = 5, #not many to run the example rapidly
#'                            dist = "shifted_poisson",
#'                            nb_iter = 500, #not many to run the example rapidly
#'                            burnin = 100)
#'                            
#' # plot estimated mixture
#' # plot(bayesmix, max_size = 200)
#' }
#' 
#' @export
bayes_estimation <- function(data,
                             K,
                             dist,
                             priors = list(),
                             nb_iter = 2000,
                             burnin = nb_iter/2,
                             printing = TRUE) {
  
  assert_that(is.vector(data) & length(data) > 0,
              msg = "data should be a vector of length > 0")
  assert_that(!any(is.na(data)) & !any(is.infinite(data)),
              msg = "y should not include missing or infinite values")
  assert_that(dist %in% c("normal", "skew_normal", "poisson", "shifted_poisson") & is.character(dist),
              msg = "Unsupported distribution. 
              dist should be either normal, skew_normal, poisson, shifted_poisson or 'NA'")
  assert_that(is.scalar(nb_iter) & nb_iter > 0, msg = "nb_iter should be a positive integer")
  assert_that(is.scalar(burnin) & burnin > 0 & burnin < nb_iter,
              msg = "nb_iter should be a positive integer lower than burnin")
  assert_that(is.scalar(K) & K > 0, msg = "K should be a positive integer")
  assert_that(is.logical(printing), msg = "printing should be either TRUE or FALSE")

  # rounding parameters that should be integers
  K = round(K)
  nb_iter = round(nb_iter)
  burnin = round(burnin)
  
  if (dist %in% c("normal")) {
    priors_labels = c("a0", "A0", "e0", "b0", "B0", "c0", "g0", "G0")
    
    mcmc = gibbs_SFM_normal(y = data,
                            K = K,
                            nb_iter = nb_iter,
                            priors = priors[priors_labels],
                            printing = printing)
    pars_names = c("eta", "mu", "sigma")
    dist_type = "continuous"
    
  } else if (dist == "skew_normal") {
    priors_labels = c("a0", "A0", "e0", "b0", "c0", "C0", "g0", "G0", "D_xi", "D_psi")
    
    mcmc <- gibbs_SFM_skew_n(y = data,
                             K = K,
                             nb_iter = nb_iter,
                             priors = priors[priors_labels],
                             printing = printing)
    pars_names = c("eta", "xi", "omega", "alpha")
    dist_type = "continuous"
    
  } else if (dist == "poisson") {
    priors_labels = c("a0", "A0", "e0", "l0", "L0")
    
    mcmc <- gibbs_SFM_poisson(y = data,
                              K = K,
                              nb_iter = nb_iter,
                              priors = priors[priors_labels],
                              printing = printing)
    pars_names = c("eta", "lambda")
    dist_type = "discrete"
    
  } else if (dist == "shifted_poisson") {
    priors_labels = c("a0", "A0", "e0", "l0", "L0")
    
    mcmc <- gibbs_SFM_sp(y = data,
                         K = K,
                         nb_iter = nb_iter,
                         priors = priors[priors_labels],
                         printing = printing)
    pars_names = c("eta", "kappa", "lambda")
    dist_type = "discrete"
    
  } else {
    stop("mixture distribution not supported")
  }
  
  BayesMixture = new_BayesMixture(mcmc = mcmc,
                                  data = data, K = K,
                                  burnin = burnin, dist = dist,
                                  dist_type = dist_type,
                                  pars_names = pars_names)
  
  return(BayesMixture)
}