% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cmgnd.R
\name{cmgnd}
\alias{cmgnd}
\title{cmgnd: Function for Clustering using Constrained Mixtures of Generalized Normal Distributions}
\usage{
cmgnd(
  x,
  K = 2,
  Cmu = rep(0, K),
  Csigma = rep(0, K),
  Cnu = rep(0, K),
  nstart = 50,
  theta = FALSE,
  nustart = rep(2, K),
  nustartype = "random",
  gauss = FALSE,
  laplace = FALSE,
  scale = FALSE,
  eps = 10^-4,
  maxit = 999,
  verbose = TRUE,
  sigbound = c(0.1, 5),
  sr = "like",
  eta = 0.5,
  seed = 12345,
  seed.nstart = seq(1:nstart)
)
}
\arguments{
\item{x}{A numeric vector of observations.}

\item{K}{An integer specifying the number of mixture components to fit.
Default is 2.}

\item{Cmu}{A binary vector indicating mixture components
for the location parameter. The k-th element is set to 1 if the
k-th mixture component belongs to the Cr partition, and 0 otherwise.
Default is \code{c(0,0)}, indicating no mixture partition with \code{K=2}.}

\item{Csigma}{A binary vector indicating mixture components
for the scale parameter. The k-th element is set to 1 if the k-th
mixture component belongs to the Cr partition, and 0 otherwise.
Default is \code{c(0,0)}, indicating no mixture partition with \code{K=2}.}

\item{Cnu}{A binary vector indicating mixture components
for the shape parameter. The k-th element is set to 1 if the k-th mixture component belongs to the Cr partition, and 0 otherwise.
Default is \code{c(0,0)}, indicating no mixture partition with \code{K=2}.}

\item{nstart}{An integer specifying the number of starting
points for the shape parameter. Default is 10.}

\item{theta}{A parameter matrix used to initialize the estimation
for the first starting point.}

\item{nustart}{A numeric vector containing the starting values for the shape parameter \code{nu}.
Default is \code{c(2,2)} for \code{K=2}.}

\item{nustartype}{A character string indicating whether the initialization of \code{nu} should be \code{"random"},
around the values in \code{nustart}, or \code{"exact"}, using the exact values in \code{nustart}.}

\item{gauss}{A logical value indicating if the algorithm should use the Gaussian distribution.
Default is \code{FALSE}.}

\item{laplace}{A logical value indicating if the algorithm should use the Laplace distribution.
Default is \code{FALSE}.}

\item{scale}{A logical value indicating whether the function should scale the data. Default is \code{TRUE}.}

\item{eps}{A numeric value specifying the tolerance level of the ECM algorithm.}

\item{maxit}{An integer specifying the maximum number of iterations.}

\item{verbose}{A logical value indicating whether to display running output. Default is \code{TRUE}.}

\item{sigbound}{A numeric vector of length two specifying the lower and upper bounds for resetting the sigma estimates.
Default value is \code{c(.01,5)}.}

\item{sr}{A character string specifying the type of convergence criterion to use.
The default is \code{"like"}, but \code{"parameter"} can be used for likelihood-based convergence.}

\item{eta}{A numeric value specifying the tolerance level for the likelihood-based convergence.
Default value is \code{.5}.}

\item{seed}{Optional integer used to set the random seed via \code{set.seed()}. The default is \code{12345}. If \code{NULL}, no seed is set and results may vary between runs.}

\item{seed.nstart}{Optional numeric vector used to set the random seed via \code{set.seed()} during the initialisation of the \code{nstart} random posterior probabilities. The default is \code{seq_len(nstart)}. If \code{NULL}, no seed is set and results may vary between runs.}
}
\value{
\item{\code{ll}}{The log-likelihood corresponding to the estimated model.}
\item{\code{nobs}}{Number of observations.}
\item{\code{parameters}}{Data frame of the estimated parameters.}
\item{\code{ic}}{Data frame of information criteria. AIC, BIC, HQIC and EDC are returned.}
\item{\code{res}}{Matrix of posterior probabilities or responsibilities.}
\item{\code{clus}}{Vector of group classifications.}
\item{\code{op_it}}{List containing three integers: \code{permstart} the
optimal starting value of the permutation of k-means solutions; \code{startnu} the
optimal starting value of the shape parameter; \code{iter} number of iterations.}
\item{\code{cputime}}{A numeric value indicating the cpu time employed.}
\item{\code{info}}{List containing a few of the original user inputs,
for use by other dedicated functions of the \code{cmgnd} class.}
}
\description{
Fits univariate constrained mixture of generalized normal distribution models
by imposing mixture partitions. Models are estimated by the ECM algorithm initialized by k-means.
}
\details{
The constrained mixture of generalized normal distributions (CMGND) model is an advanced statistical tool designed for
analyzing univariate data characterized by non-normal features such as asymmetry, multi-modality,
leptokurtosis, and heavy tails. This model extends the mixture of generalized normal
distributions (MGND) by incorporating constraints on the parameters, thereby reducing
the number of parameters to be estimated and improving model performance.
The CMGND model is defined by the following components:
\deqn{f(x|\theta) = \sum_{k=1}^{K} \pi_k f_k(x|\mu_k, \sigma_k, \nu_k)}
where:
\eqn{\pi_k} are the mixture weights, satisfying \eqn{0 < \pi_k < 1} and \eqn{\sum_{k=1}^{K} \pi_k = 1}.
\eqn{f_k(x|\mu_k, \sigma_k, \nu_k)} is the Generalized Normal Distribution for the k-th component with mean \eqn{\mu_k},
scale \eqn{\sigma_k}, and shape parameter \eqn{\nu_k}.

The parameter space can be constrained by imposing equality constraints
such as \eqn{\mu_k = \mu_r}, \eqn{\sigma_k = \sigma_r}, and/or \eqn{\nu_k = \nu_r}
for all \eqn{k \in C_r}, where \eqn{C_r} is a partition of the set \eqn{\{1, 2, \ldots, K\}}.

The \eqn{k \in C_r} partition for each parameter can be specified
by the binary vectors \code{Cmu}, \code{Csigma} and \code{Cnu}.
}
\examples{
# Old Faithful dataset
x=faithful$eruptions
# Unconstrained model estimation
Cmu <- c(0, 0)
Csigma <- c(0, 0)
Cnu <- c(0, 0)
model_unc <- cmgnd(x, nstart = 2, K = 2, Cmu, Csigma, Cnu)
model_unc$parameters
plot_cmgnd(x, model_unc)
# Constrained model estimation with common scale parameters
Csigma <- c(1, 1)
model_con <- cmgnd(x, nstart = 2, K =2, Cmu, Csigma, Cnu)
model_con$parameters
plot_cmgnd(x, model_con)
}
\references{
Bazi, Y., Bruzzone, L., and Melgani, F. (2006). Image thresholding
based on the em algorithm and the generalized gaussian distribution.
Pattern Recognition, 40(2), pp 619–634.

Wen, L., Qiu, Y., Wang, M., Yin, J., and Chen, P. (2022). Numerical characteristics and
parameter estimation of finite mixed generalized normal distribution. Communications in
Statistics - Simulation and Computation, 51(7), pp 3596–3620.

Duttilo, P. (2024). Modelling financial returns with mixtures of generalized normal distributions.
PhD Thesis, University “G. d’Annunzio” of Chieti-Pescara, pp. 1-166,
\doi{10.48550/arXiv.2411.11847}

Duttilo, P. and Gattone, S.A. (2025). Enhancing parameter estimation in finite
mixture of generalized normal distributions, Computational Statistics, pp. 1-28,
\doi{10.1007/s00180-025-01638-x}

Duttilo, P., Gattone, S.A., and Kume A. (2025). Constrained mixtures of generalized normal distributions,
pp. 1-34, \doi{10.48550/arXiv.2506.03285}
}
