% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/FFTrees.R, R/helper.R
\docType{package}
\name{FFTrees}
\alias{FFTrees}
\title{Main function to create and apply fast-and-frugal trees (FFTs)}
\usage{
FFTrees(
  formula = NULL,
  data = NULL,
  data.test = NULL,
  algorithm = "ifan",
  max.levels = NULL,
  sens.w = 0.5,
  cost.outcomes = NULL,
  cost.cues = NULL,
  stopping.rule = "exemplars",
  stopping.par = 0.1,
  goal = NULL,
  goal.chase = NULL,
  goal.threshold = "bacc",
  numthresh.method = "o",
  numthresh.n = 10,
  decision.labels = c("False", "True"),
  main = NULL,
  train.p = 1,
  rounding = NULL,
  repeat.cues = TRUE,
  my.tree = NULL,
  tree.definitions = NULL,
  do.comp = TRUE,
  do.cart = TRUE,
  do.lr = TRUE,
  do.rf = TRUE,
  do.svm = TRUE,
  object = NULL,
  force = FALSE,
  quiet = FALSE,
  comp = NULL,
  rank.method = NULL,
  store.data = NULL,
  verbose = NULL
)
}
\arguments{
\item{formula}{formula. A \code{\link{formula}} specifying a binary criterion variable (as logical) as a function of 1 or more predictor variables (cues).}

\item{data}{dataframe. A training dataset.}

\item{data.test}{dataframe. An optional testing dataset with the same structure as data.}

\item{algorithm}{character. The algorithm used to create FFTs. Can be \code{'ifan'}, \code{'dfan'}.}

\item{max.levels}{integer. The maximum number of levels considered for the trees. Because all permutations of exit structures are considered, the larger \code{max.levels} is, the more trees will be created.}

\item{sens.w}{numeric. A number from 0 to 1 indicating how to weight sensitivity relative to specificity. Only relevant when \code{goal = 'wacc'}.}

\item{cost.outcomes}{A list of length 4 with names \code{'hi'}, \code{'fa'}, \code{'mi'}, and \code{'cr'} specifying the costs of a hit, false alarm, miss, and correct rejection, respectively.
E.g.; \code{cost.outcomes = listc("hi" = 0, "fa" = 10, "mi" = 20, "cr" = 0)} means that a false alarm and miss cost 10 and 20, respectively, while correct decisions have no costs.}

\item{cost.cues}{A list containing costs for each cue.
Each element should have a name corresponding to a column in \code{data}, and each entry should be a single (positive) number.
Cues not present in \code{cost.cues} are assumed to have no costs (i.e., a value of 0).}

\item{stopping.rule}{character. A string indicating the method to stop growing trees.
\code{"levels"} means the tree grows until a certain level;
\code{"exemplars"} means the tree grows until a certain number of unclassified exemplars remain;
\code{"statdelta"} means the tree grows until the change in the criterion statistic is less than a specified level.}

\item{stopping.par}{numeric. A number indicating the parameter for the stopping rule.
For stopping.rule \code{"levels"}, this is the number of levels.
For stopping rule \code{"exemplars"}, this is the smallest percentage of exemplars allowed in the last level.}

\item{goal}{character. A string indicating the statistic to maximize when selecting final trees: \code{"acc"} = overall accuracy, \code{"bacc"} = balanced accuracy, \code{"wacc"} = weighted accuracy.}

\item{goal.chase}{character. A string indicating the statistic to maximize when constructing trees: \code{"acc"} = overall accuracy, \code{"bacc"} = balanced accuracy, \code{"wacc"} = weighted accuracy, \code{"cost"} = cue costs.}

\item{goal.threshold}{character. A string indicating the statistic to maximize when calculating cue thresholds: \code{"acc"} = overall accuracy, \code{"bacc"} = balanced accuracy, \code{"wacc"} = weighted accuracy.
Default: \code{goal.threshold = "bacc"}.}

\item{numthresh.method}{character. How should thresholds for numeric cues be determined? \code{"o"} will optimize thresholds, while \code{"m"} will always use the median.}

\item{numthresh.n}{integer. Number of numeric thresholds to try.}

\item{decision.labels}{string. A vector of strings of length 2 indicating labels for negative and positive cases. E.g.; \code{decision.labels = c("Healthy", "Diseased")}.}

\item{main}{string. An optional label for the dataset. Passed on to other functions, like \code{\link{plot.FFTrees}}, and \code{\link{print.FFTrees}}.}

\item{train.p}{numeric. What percentage of the data to use for training when \code{data.test} is not specified? For example, \code{train.p = .5} will randomly split \code{data} into a 50\% training set and a 50\% test set. \code{train.p = 1}, the default, uses all data for training.}

\item{rounding}{integer. An integer indicating digit rounding for non-integer numeric cue thresholds. The default is NULL which means no rounding. A value of 0 rounds all possible thresholds to the nearest integer, 1 rounds to the nearest .1 (etc.).}

\item{repeat.cues}{logical. Can cues occur multiple times within a tree?}

\item{my.tree}{string. A string representing a verbal description of an FFT, i.e., an FFT in words.
For example, \code{my.tree = "If age > 20, predict TRUE. If sex = {m}, predict FALSE. Otherwise, predict TRUE."}}

\item{tree.definitions}{dataframe. An optional hard-coded definition of trees (see details below). If specified, no new trees are created.}

\item{do.comp, do.cart, do.lr, do.rf, do.svm}{logical. Should alternative algorithms be created for comparison? All TRUE by default. Options are:
\code{cart} = regular (non-frugal) trees with \strong{rpart};
\code{lr} = logistic regression with \strong{glm};
\code{rf} = random forests with \strong{randomForest};
\code{svm} = support vector machines with \strong{e1071}.
Specifying \code{comp = FALSE} sets all these arguments to \code{FALSE}.}

\item{object}{FFTrees. An optional existing \code{FFTrees} object. When specified, no new trees are fitted and the existing trees are applied to \code{data} and \code{data.test}.}

\item{force}{logical. Setting \code{force = TRUE} forces some parameters (like goal) to be as specified by the user even when the algorithm thinks those specifications don't make sense. Default is \code{force = FALSE}.}

\item{quiet}{logical. Should progress reports be printed? Can be helpful for diagnosis when the function is running slowly. Default is \code{quiet = FALSE} (i.e., show progress).}

\item{comp, rank.method, store.data, verbose}{Deprecated arguments (unused or replaced, to be retired in future releases).}
}
\value{
An \code{FFTrees} object with the following elements:
\describe{
  \item{criterion_name}{The name of the binary criterion variable (as character).}
  \item{cue_names}{The names of all potential predictor variables (cues) in the data (as character).}
  \item{formula}{The \code{\link{formula}} specified when creating the FFTs.}
  \item{trees}{A list of FFTs created, with further details contained in \code{n}, \code{best}, \code{definitions}, \code{inwords}, \code{stats}, \code{level_stats}, and \code{decisions}.}
  \item{data}{The original training and test data (if available).}
  \item{params}{A list of defined control parameters (e.g.; \code{algorithm}, \code{goal}).}
  \item{competition}{Models and classification statistics for competitive classification algorithms: Regularized logistic regression, CART, and random forest.}
  \item{cues}{A list of cue information, with further details contained in \code{thresholds} and \code{stats}.}
}
}
\description{
\code{FFTrees} is the workhorse function of the \strong{FFTrees} package for creating fast-and-frugal trees (FFTs).

FFTs are decision algorithms for solving binary classification tasks, i.e., they predict the values of a binary criterion variable based on 1 or multiple predictor variables (cues).

Using \code{FFTrees} on \code{data} usually generates a range of FFTs and corresponding summary statistics (as an \code{FFTrees} object)
that can then be printed, plotted, and examined further.

The criterion and predictor variables are specified in \code{\link{formula}} notation.
Based on the settings of \code{data} and \code{data.test}, FFTs are trained on a (required) training dataset and tested on an (optional) test dataset.

Create fast-and-frugal trees (FFTs).
}
\examples{

# Create fast-and-frugal trees (FFTs) for heart disease:
heart.fft <- FFTrees(formula = diagnosis ~ .,
                     data = heart.train,
                     data.test = heart.test,
                     main = "Heart Disease",
                     decision.labels = c("Healthy", "Diseased")
                     )

# Print a summary of the result:
heart.fft

# Plot an FFT applied to training data:
plot(heart.fft, stats = FALSE)
plot(heart.fft)

# Apply FFT to (new) testing data:
plot(heart.fft, data = "test")
plot(heart.fft, data = "test", tree = 2)  # Plot Tree #2

# Predict classes and probabilities for new data:
predict(heart.fft, newdata = heartdisease)
predict(heart.fft, newdata = heartdisease, type = "prob")

# Create custom trees with my.tree:
custom.fft <- FFTrees(
  formula = diagnosis ~ .,
  data = heartdisease,
  my.tree = "If chol > 300, predict True.
             If sex = {m}, predict False,
             If age > 70, predict True, otherwise predict False."
             )

# Plot the (pretty terrible) custom tree:
plot(custom.fft)

}
\seealso{
\code{\link{print.FFTrees}} for printing FFTs;
\code{\link{plot.FFTrees}} for plotting FFTs;
\code{\link{summary.FFTrees}} for summarizing FFTs;
\code{\link{inwords}} for obtaining a verbal description of FFTs;
\code{\link{showcues}} for plotting cue accuracies.
}
