% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tkmeans.R
\name{tkmeans}
\alias{tkmeans}
\alias{print.tkmeans}
\title{TKMEANS method for robust K-means clustering}
\usage{
tkmeans(
  x,
  k,
  alpha = 0.05,
  nstart = 500,
  niter1 = 3,
  niter2 = 20,
  nkeep = 5,
  iter.max,
  points = NULL,
  center = FALSE,
  scale = FALSE,
  store_x = TRUE,
  parallel = FALSE,
  n.cores = -1,
  zero_tol = 1e-16,
  drop.empty.clust = TRUE,
  trace = 0
)
}
\arguments{
\item{x}{A matrix or data.frame of dimension n x p, containing the observations (row-wise).}

\item{k}{The number of clusters initially searched for.}

\item{alpha}{The proportion of observations to be trimmed.}

\item{nstart}{The number of random initializations to be performed.}

\item{niter1}{The number of concentration steps to be performed for the nstart initializations.}

\item{niter2}{The maximum number of concentration steps to be performed for the 
\code{nkeep} solutions kept for further iteration. The concentration steps are 
stopped, whenever two consecutive steps lead to the same data partition.}

\item{nkeep}{The number of iterated initializations (after niter1 concentration 
steps) with the best values in the target function that are kept for further iterations}

\item{iter.max}{(deprecated, use the combination \code{nkeep, niter1 and niter2}) 
The maximum number of concentration steps to be performed.
The concentration steps are stopped, whenever two consecutive steps lead
to the same data partition.}

\item{points}{Optional initial mean vectors, \code{NULL} or a matrix with \code{k} 
vectors used as means to initialize the algorithm. If initial mean vectors are 
specified, \code{nstart} should be 1 (otherwise the same initial means are 
used for all runs).}

\item{center}{Optional centering of the data: a function or a vector of length p 
which can optionally be specified for centering x before calculation}

\item{scale}{Optional scaling of the data: a function or a vector of length p 
which can optionally be specified for scaling x before calculation}

\item{store_x}{A logical value, specifying whether the data matrix \code{x} shall be 
included in the result object. By default this value is set to \code{TRUE}, because 
some of the plotting functions depend on this information. However, when big data 
matrices are handled, the result object's size can be decreased noticeably 
when setting this parameter to \code{FALSE}.}

\item{parallel}{A logical value, specifying whether the nstart initializations should be done in parallel.}

\item{n.cores}{The number of cores to use when paralellizing, only taken into account if parallel=TRUE.}

\item{zero_tol}{The zero tolerance used. By default set to 1e-16.}

\item{drop.empty.clust}{Logical value specifying, whether empty clusters shall be 
omitted in the resulting object. (The result structure does not contain center 
estimates of empty clusters anymore. Cluster names are reassigned 
such that the first l clusters (l <= k) always have at least one observation.}

\item{trace}{Defines the tracing level, which is set to 0 by default. Tracing level 1 
gives additional information on the stage of the iterative process.}
}
\value{
The function returns the following values:
\itemize{
    \item cluster - A numerical vector of size \code{n} containing the cluster assignment 
         for each observation. Cluster names are integer numbers from 1 to k, 0 indicates 
         trimmed observations. Note that it could be empty clusters with no observations 
         when \code{equal.weights=FALSE}.
    \item obj - The value of the objective function of the best (returned) solution.
    \item size - An integer vector of size k, returning the number of observations contained by each cluster.
    \item centers - A matrix of size p x k containing the centers (column-wise) of each cluster. 
    \item code - A numerical value indicating if the concentration steps have 
         converged for the returned solution (2).
    \item cluster.ini - A matrix with nstart rows and number of columns equal to 
         the number of observations and where each row shows the final clustering 
         assignments (0 for trimmed observations) obtained after the \code{niter1} 
         iteration of the \code{nstart} random initializations.
    \item obj.ini - A numerical vector of length \code{nstart} containing the values 
         of the target function obtained after the \code{niter1} iteration of the 
         \code{nstart} random initializations.
    \item x - The input data set.
    \item k - The input number of clusters.
    \item alpha - The input trimming level.
}
}
\description{
This function searches for \code{k} (or less) spherical clusters 
 in a data matrix \code{x}, whereas the \code{ceiling(alpha n)} most outlying 
 observations are trimmed.
}
\examples{

 \dontshow{
     set.seed(0)
 }
 ##--- EXAMPLE 1 ------------------------------------------
 sig <- diag(2)
 cen <- rep(1,2)
 x <- rbind(MASS::mvrnorm(360, cen * 0,   sig),
            MASS::mvrnorm(540, cen * 5,   sig),
            MASS::mvrnorm(100, cen * 2.5, sig))
 
 ## Two groups and 10\\% trimming level
 (clus <- tkmeans(x, k = 2, alpha = 0.1))

 plot(clus)
 plot(clus, labels = "observation")
 plot(clus, labels = "cluster")

 #--- EXAMPLE 2 ------------------------------------------
 data(geyser2)
 (clus <- tkmeans(geyser2, k = 3, alpha = 0.03))
 plot(clus)
 
}
\references{
Cuesta-Albertos, J. A.; Gordaliza, A. and Matrán, C. (1997), "Trimmed k-means: 
 an attempt to robustify quantizers". Annals of Statistics, Vol. 25 (2), 553-576.
}
\author{
Valentin Todorov, Luis Angel Garcia Escudero, Agustin Mayo Iscar.
}
