% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tclustIC.R
\name{tclustIC}
\alias{tclustIC}
\title{Performs cluster analysis by calling \code{\link{tclustfsda}} for different
 number of groups \code{k} and restriction factors \code{c}}
\usage{
tclustIC(
  x,
  kk = 1:5,
  cc = c(1, 2, 4, 8, 16, 32, 64, 128),
  alpha = 0,
  whichIC = c("ALL", "MIXMIX", "MIXCLA", "CLACLA"),
  nsamp,
  refsteps = 15,
  reftol = 1e-14,
  equalweights = FALSE,
  msg = TRUE,
  nocheck = FALSE,
  plot = FALSE,
  startv1 = 1,
  restrtype = c("eigen", "deter"),
  UnitsSameGroup,
  numpool,
  cleanpool,
  trace = FALSE,
  ...
)
}
\arguments{
\item{x}{An n x p data matrix (n observations and p variables).
 Rows of x represent observations, and columns represent variables.

 Missing values (NA's) and infinite values (Inf's) are allowed,
 since observations (rows) with missing or infinite values will
 automatically be excluded from the computations.}

\item{kk}{an integer vector specifying the number of mixture components (clusters) for which the BIC is to be calculated. By default \code{kk=1:5}.}

\item{cc}{an  vector specifying the values of the restriction factor which have to be considered. By default \code{cc=c(1, 2, 4, 8, 16, 32, 64, 128)}.}

\item{alpha}{Global trimming level. A scalar between 0 and 0.5 or an integer specifying the number of
 observations which have to be trimmed. If \code{alpha=0} all observations are considered. By default \code{alpha=0}.

 More in detail, if \code{0 < alpha < 1} clustering is based on \code{h = fix(n * (1-alpha))}
 observations, else if alpha is an integer greater than 1 clustering is based on \code{h = n - floor(alpha)}.}

\item{whichIC}{A character value which specifies which information criteria must be computed
for each \code{k} (number of groups) and each value of the restriction factor \code{c}. Possible values for \code{whichIC} are:
\itemize{
 \item "MIXMIX": a mixture model is fitted and for computing the information criterion
    the mixture likelihood is used. This option corresponds to the use of the Bayesian
    Information criterion (BIC). In output just the matrix \code{MIXMIX} is given.
\item "MIXCLA": a mixture model is fitted but to compute the information criterion
    the classification likelihood is used. This option corresponds to the use of the
    Integrated Complete Likelihood (ICL). In the output just the matrix \code{MIXCLA} is given.
\item "CLACLA": everything is based on the classification likelihood. This information
    criterion will be called CLA. In the output just the matrix \code{CLACLA} is given.
\item "ALL": both classification and mixture likelihood are used. In this case all
    three information criteria CLA, ICL and BIC are computed. In the output all
    three matrices \code{MIXMIX}, \code{MIXCLA} and \code{CLACLA} are given.
}}

\item{nsamp}{If a scalar, it contains the number of subsamples which will be extracted.
 If \code{nsamp = 0} all subsets will be extracted. Remark - if the number of all possible
 subset is greater than 300 the default is to extract all subsets, otherwise just 300.
 If \code{nsamp} is a matrix it contains in the rows the indexes of the subsets which
 have to be extracted. \code{nsamp} in this case can be conveniently generated by
 function \code{subsets()}. \code{nsamp} can have \code{k} columns or \code{k * (p + 1)}
 columns. If \code{nsamp} has \code{k} columns the \code{k} initial centroids each
 iteration i are given by \code{X[nsamp[i,] ,]} and the covariance matrices are equal
 to the identity.

 If \code{nsamp} has \code{k * (p + 1)} columns, the initial centroids and covariance
 matrices in iteration \code{i} are computed as follows:
 \itemize{
 \item X1 <- X[nsamp[i ,] ,]
 \item mean(X1[1:p + 1, ]) contains the initial centroid for group 1
 \item cov(X1[1:p + 1, ]) contains the initial cov matrix for group 1
 \item mean(X1[(p + 2):(2*p + 2), ]) contains the initial centroid for group 2
 \item cov(X1[(p + 2):(2*p + 2), ]) contains the initial cov matrix for group 2
 \item ...
 \item mean(X1[(k-1)*p+1):(k*(p+1), ]) contains the initial centroids for group k
 \item cov(X1[(k-1)*p+1):(k*(p+1), ]) contains the initial cov matrix for group k.
 }

 REMARK: If \code{nsamp} is not a scalar, the option \code{startv1} given below is ignored.
 More precisely, if \code{nsamp} has \code{k} columns \code{startv1 = 0} else if
 \code{nsamp} has \code{k*(p+1)} columns option \code{startv1=1}.}

\item{refsteps}{Number of refining iterations in each subsample. Default is \code{refsteps=15}.
\code{refsteps = 0} means "raw-subsampling" without iterations.}

\item{reftol}{Tolerance of the refining steps. The default value is 1e-14}

\item{equalweights}{A logical specifying wheather cluster weights in the concentration
 and assignment steps shall be considered. If \code{equalweights=TRUE} we are (ideally)
 assuming equally sized groups, else if \code{equalweights = false} (default) we allow for
 different group weights. Please, check in the given references which functions
are maximized in both cases.}

\item{msg}{Controls whether to display or not messages on the screen If \code{msg==TRUE} (default)
messages are displayed on the screen. If \code{msg=2}, detailed messages are displayed,
for example the information at iteration level.}

\item{nocheck}{Check input arguments. If \code{nocheck=TRUE} no check is performed
on matrix \code{X}. The default \code{nocheck=FALSE}.}

\item{plot}{If \code{plot=TRUE}, a plot of the BIC (MIXMIX), ICL (MIXCLA) curve
and CLACLA is shown on the screen. The plots which are shown depend on
the input option \code{whichIC}.}

\item{startv1}{How to initialize centroids and covariance matrices. Scalar.
 If \code{startv1=1} then initial centroids and covariance matrices are based
 on \code{(p+1)} observations randomly chosen, else each centroid is initialized
 taking a random row of input data matrix and covariance matrices are initialized
 with identity matrices. The default value is\code{startv1=1}.

 Remark 1: in order to start with a routine which is in the required parameter space,
 eigenvalue restrictions are immediately applied.

Remark 2 - option \code{startv1} is used just if \code{nsamp} is a scalar
 (see for more details the help associated with \code{nsamp}).}

\item{restrtype}{Type of restriction to be applied on the cluster scatter matrices.
Valid values are \code{'eigen'} (default), or \code{'deter'}.
\code{"eigen"} implies restriction on the eigenvalues while \code{"deter"}
implies restriction on the determinants.}

\item{UnitsSameGroup}{List of the units which must (whenever possible) have
a particular label. For example \code{UnitsSameGroup=c(20, 26)}, means that
group which contains unit 20 is always labelled with number 1. Similarly,
the group which contains unit 26 is always labelled with number 2, (unless
it is found that unit 26 already belongs to group 1).
In general, group which contains unit \code{UnitsSameGroup(r)} where \code{r=2, ...length(kk)-1}
is labelled with number \code{r} (unless it is found that unit \code{UnitsSameGroup(r)}
has already been assigned to groups \code{1, 2, ..., r-1}.}

\item{numpool}{The number of parallel sessions to open. If numpool is not defined,
then it is set equal to the number of physical cores in the computer.}

\item{cleanpool}{Logical, indicating if the open pool must be closed or not.
It is useful to leave it open if there are subsequent parallel sessions to execute,
so that to save the time required to open a new pool.}

\item{trace}{Whether to print intermediate results. Default is \code{trace=FALSE}.}

\item{...}{potential further arguments passed to lower level functions.}
}
\value{
An S3 object of class \code{\link{tclustic.object}}
}
\description{
Computes the values of BIC (MIXMIX), ICL (MIXCLA) or CLA (CLACLA),
 for different values of \code{k} (number of groups) and different values of \code{c}
 (restriction factor), for a prespecified level of trimming (the last two letters in the name
 stand for 'Information Criterion'). If Parallel Computing toolbox is installed, parfor is
 used to compute \code{tclust} for different values of \code{c}. In order to minimize
 randomness, given \code{k}, the same subsets are used for each value of \code{c}.
}
\examples{
 \dontrun{
 data(geyser2)
 out <- tclustIC(geyser2, whichIC="MIXMIX", plot=FALSE, alpha=0.1)
 out
 summary(out)
 }
}
\references{
Cerioli, A., Garcia-Escudero, L.A., Mayo-Iscar, A. and Riani M. (2017).
     Finding the Number of Groups in Model-Based Clustering via Constrained Likelihoods,
     emph{Journal of Computational and Graphical Statistics}, pp. 404-416,
     https://doi.org/10.1080/10618600.2017.1390469.
}
\seealso{
\code{\link{tclustfsda}}, \code{\link{tclustICplot}}, \code{\link{tclustICsol}}, \code{\link{carbikeplot}}
}
\author{
FSDA team, \email{valentin.todorov@chello.at}
}
