% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/amova.r
\name{poppr.amova}
\alias{amova}
\alias{poppr.amova}
\title{Perform Analysis of Molecular Variance (AMOVA) on genind or genclone objects.}
\usage{
poppr.amova(x, hier = NULL, clonecorrect = FALSE, within = TRUE,
  dist = NULL, squared = TRUE, correction = "quasieuclid", sep = "_",
  filter = FALSE, threshold = 0, algorithm = "farthest_neighbor",
  missing = "loci", cutoff = 0.05, quiet = FALSE, method = c("ade4",
  "pegas"), nperm = 0)
}
\arguments{
\item{x}{a \code{\linkS4class{genind}} or \code{\linkS4class{genclone}}
object}

\item{hier}{a hierarchical \code{\link{formula}} that defines your population
hierarchy. (e.g.: ~Population/Subpopulation). \strong{See Details below.}}

\item{clonecorrect}{\code{logical} if \code{TRUE}, the data set will be clone
corrected with respect to the lowest level of the hierarchy. The default is
set to \code{FALSE}. See \code{\link{clonecorrect}} for details.}

\item{within}{\code{logical}. When this is set to \code{TRUE} (Default), 
variance within individuals are calculated as well. If this is set to
\code{FALSE}, The lowest level of the hierarchy will be the sample level.
See Details below.}

\item{dist}{an optional distance matrix calculated on your data. If this is
set to \code{NULL} (default), the raw pairwise distances will be calculated
via \code{\link{diss.dist}}.}

\item{squared}{if a distance matrix is supplied, this indicates whether or
not it represents squared distances.}

\item{correction}{a \code{character} defining the correction method for 
non-euclidean distances. Options are \code{\link[ade4]{quasieuclid}}
(Default), \code{\link[ade4]{lingoes}}, and \code{\link[ade4]{cailliez}}.
See Details below.}

\item{sep}{Deprecated. As of poppr version 2, this argument serves no purpose.}

\item{filter}{\code{logical} When set to \code{TRUE}, mlg.filter will be run 
to determine genotypes from the distance matrix. It defaults to 
\code{FALSE}. You can set the parameters with \code{algorithm} and
\code{threshold} arguments. Note that this will not be performed when 
\code{within = TRUE}. Note that the threshold should be the number of
allowable substitutions if you don't supply a distance matrix.}

\item{threshold}{a number indicating the minimum distance two MLGs must be
separated by to be considered different. Defaults to 0, which will reflect
the original (naive) MLG definition.}

\item{algorithm}{determines the type of clustering to be done. 
\describe{
  \item{"farthest_neighbor"}{\emph{ (default) }merges clusters based on the 
  maximum distance between points in either cluster. This is the strictest of
  the three.}
  \item{"nearest_neighbor"}{ merges clusters based on the minimum distance
  between points in either cluster. This is the loosest of the three.}
  \item{"average_neighbor"}{ merges clusters based on the average distance
  between every pair of points between clusters.}
}}

\item{missing}{specify method of correcting for missing data utilizing
options given in the function \code{\link{missingno}}. Default is
\code{"loci"}.}

\item{cutoff}{specify the level at which missing data should be
removed/modified. See \code{\link{missingno}} for details.}

\item{quiet}{\code{logical} If \code{FALSE} (Default), messages regarding any
corrections will be printed to the screen. If \code{TRUE}, no messages will
be printed.}

\item{method}{Which method for calculating AMOVA should be used? Choices 
refer to package implementations: "ade4" (default) or "pegas". See details
for differences.}

\item{nperm}{the number of permutations passed to the pegas implementation of
amova.}
}
\value{
a list of class \code{amova} from the ade4 package. See 
  \code{\link[ade4]{amova}} for details.
}
\description{
This function simplifies the process necessary for performing AMOVA in R. It
gives user the choice of utilizing either the \pkg{ade4} or the \pkg{pegas}
implementation of AMOVA. See \code{\link[ade4]{amova}} (ade4) and
\code{\link[pegas]{amova}} (pegas) for details on the specific
implementation.
}
\details{
The poppr implementation of AMOVA is a very detailed wrapper for the
  ade4 implementation. The output is an \code{\link[ade4]{amova}} class list 
  that contains the results in the first four elements. The inputs are
  contained in the last three elements. The inputs required for the ade4
  implementation are:
  \enumerate{
  \item a distance matrix on all unique genotypes (haplotypes)
  \item a data frame defining the hierarchy of the distance matrix 
  \item  a genotype (haplotype) frequency table.} 
  All of this data can be constructed from a \code{\linkS4class{genind}}
  object, but can be daunting for a novice R user. \emph{This function
  automates the entire process}. Since there are many variables regarding
  genetic data, some points need to be highlighted:
  
  \subsection{On Hierarchies:}{The hierarchy is defined by different 
  population strata that separate your data hierarchically. These strata are
  defined in the \strong{strata} slot of \code{\linkS4class{genind}} and
  \code{\linkS4class{genclone}}} objects. They are useful for defining the
  population factor for your data. See the function \code{\link{strata}} for
  details on how to properly define these strata.

  \subsection{On Within Individual Variance:}{ Heterozygosities within
  diploid genotypes are sources of variation from within individuals and can
  be quantified in AMOVA. When \code{within = TRUE}, poppr will split diploid
  genotypes into haplotypes and use those to calculate within-individual 
  variance. No estimation of phase is made. This acts much like the default 
  settings for AMOVA in the Arlequin software package. Within individual 
  variance will not be calculated for haploid individuals or dominant 
  markers.}

  \subsection{On Euclidean Distances:}{ AMOVA, as defined by
  Excoffier et al., utilizes an absolute genetic distance measured in the
  number of differences between two samples across all loci. With the ade4
  implementation of AMOVA (utilized by poppr), distances must be Euclidean
  (due to the nature of the calculations). Unfortunately, many genetic
  distance measures are not always euclidean and must be corrected for before
  being analyzed. Poppr automates this with three methods implemented in 
  ade4, \code{\link{quasieuclid}}, \code{\link{lingoes}}, and 
  \code{\link{cailliez}}. The correction of these distances should not 
  adversely affect the outcome of the analysis.}
  
  \subsection{On Filtering:}{ Filtering multilocus genotypes is performed by
  \code{\link{mlg.filter}}. This can necessarily only be done AMOVA tests 
  that do not account for within-individual variance. The distance matrix used
  to calculate the amova is derived from using \code{\link{mlg.filter}} with
  the option \code{stats = "distance"}, which reports the distance between 
  multilocus genotype clusters. One useful way to utilize this feature is to
  correct for genotypes that have equivalent distance due to missing data. 
  (See example below.)}
  
  \subsection{On Methods:}{ Both \pkg{ade4} and \pkg{pegas} have 
  implementations of AMOVA, both of which are appropriately called "amova". 
  The ade4 version is faster, but there have been questions raised as to the 
  validity of the code utilized. The pegas version is slower, but careful 
  measures have been implemented as to the accuracy of the method. It must be
  noted that there appears to be a bug regarding permuting analyses where 
  within individual variance is accounted for (\code{within = TRUE}) in the 
  pegas implementation. If you want to perform permutation analyses on the 
  pegas implementation, you must set \code{within = FALSE}. In addition,
  while clone correction is implemented for both methods, filtering is only
  implemented for the ade4 version.}
}
\note{
The ade4 function \code{\link[ade4]{randtest.amova}} contains a slight 
  bug as of version 1.7.4 which causes the wrong alternative hypothesis to be
  applied on every 4th heirarchical level. Luckily, there is a way to fix it
  by re-converting the results with the function
  \code{\link[ade4]{as.krandtest}}. See examples for details.
}
\examples{
data(Aeut)
strata(Aeut) <- other(Aeut)$population_hierarchy[-1]
agc <- as.genclone(Aeut)
agc
amova.result <- poppr.amova(agc, ~Pop/Subpop)
amova.result
amova.test <- randtest(amova.result) # Test for significance
plot(amova.test)
amova.test

\dontrun{

# You can get the same results with the pegas implementation
amova.pegas <- poppr.amova(agc, ~Pop/Subpop, method = "pegas")
amova.pegas
amova.pegas$varcomp/sum(amova.pegas$varcomp)

# Clone correction is possible
amova.cc.result <- poppr.amova(agc, ~Pop/Subpop, clonecorrect = TRUE)
amova.cc.result
amova.cc.test <- randtest(amova.cc.result)
plot(amova.cc.test)
amova.cc.test


# Example with filtering
data(monpop)
splitStrata(monpop) <- ~Tree/Year/Symptom
poppr.amova(monpop, ~Symptom/Year) # gets a warning of zero distances
poppr.amova(monpop, ~Symptom/Year, filter = TRUE, threshold = 0.1) # no warning

# Correcting incorrect alternate hypotheses with >2 heirarchical levels
# 
mon.amova <- poppr.amova(monpop, ~Symptom/Year/Tree)
mon.test  <- randtest(mon.amova)
mon.test # Note alter is less, greater, greater, less
alt <- c("less", "greater", "greater", "greater") # extend this to the number of levels
with(mon.test, as.krandtest(sim, obs, alter = alt, call = call, names = names))

}
}
\references{
Excoffier, L., Smouse, P.E. and Quattro, J.M. (1992) Analysis of
molecular variance inferred from metric distances among DNA haplotypes:
application to human mitochondrial DNA restriction data. \emph{Genetics},
\strong{131}, 479-491.
}
\seealso{
\code{\link[ade4]{amova}} (ade4) \code{\link[pegas]{amova}} (pegas) 
  \code{\link{clonecorrect}} \code{\link{diss.dist}} \code{\link{missingno}} 
  \code{\link[ade4]{is.euclid}} \code{\link{strata}}
}
\keyword{amova}

