% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/brif.R
\name{brif.default}
\alias{brif.default}
\title{Build a model taking a data frame as input}
\usage{
\method{brif}{default}(
  x,
  n_numeric_cuts = 31,
  n_integer_cuts = 31,
  max_integer_classes = 20,
  max_depth = 20,
  min_node_size = 1,
  ntrees = 200,
  ps = 0,
  max_factor_levels = 30,
  seed = 0,
  bagging_method = 0,
  bagging_proportion = 0.9,
  split_search = 4,
  search_radius = 5,
  verbose = 0,
  nthreads = 2,
  ...
)
}
\arguments{
\item{x}{a data frame containing the training data set. The first column is taken as the target variable and all other columns are used as predictors.}

\item{n_numeric_cuts}{an integer value indicating the maximum number of split points to generate for each numeric variable.}

\item{n_integer_cuts}{an integer value indicating the maximum number of split points to generate for each integer variable.}

\item{max_integer_classes}{an integer value. If the target variable is integer and has more than max_integer_classes unique values in the training data, then the target variable will be grouped into max_integer_classes bins. If the target variable is numeric, then the smaller of max_integer_classes and the number of unique values number of bins will be created on the target variables and the regression problem will be solved as a classification problem.}

\item{max_depth}{an integer specifying the maximum depth of each tree. Maximum is 40.}

\item{min_node_size}{an integer specifying the minimum number of training cases a leaf node must contain.}

\item{ntrees}{an integer specifying the number of trees in the forest.}

\item{ps}{an integer indicating the number of predictors to sample at each node split. Default is 0, meaning to use sqrt(p), where p is the number of predictors in the input.}

\item{max_factor_levels}{an integer. If any factor variables has more than max_factor_levels, the program stops and prompts the user to increase the value of this parameter if the too-many-level factor is indeed intended.}

\item{seed}{an integer specifying the seed used by the internal random number generator. Default is 0, meaning not to set a seed but to accept the set seed from the calling environment.}

\item{bagging_method}{an integer indicating the bagging sampling method: 0 for sampling without replacement; 1 for sampling with replacement (bootstrapping).}

\item{bagging_proportion}{a numeric scalar between 0 and 1, indicating the proportion of training observations to be used in each tree.}

\item{split_search}{an integer indicating the choice of the split search method. 0: randomly pick a split point; 1: do a local search; 2: random pick subject to regulation; 3: local search subject to regulation; 4 or above: a mix of options 0 to 3.}

\item{search_radius}{an positive integer indicating the split point search radius. This parameter takes effect only in the self-regulating local search (split_search = 2 or above).}

\item{verbose}{an integer (0 or 1) specifying the verbose level.}

\item{nthreads}{an integer specifying the number of threads used by the program. This parameter takes effect only on systems supporting OpenMP.}

\item{...}{additional arguments.}
}
\value{
an object of class \code{brif}, which is a list containing the following components. Note: this object is not intended for any use other than that by the function \code{\link[brif]{predict.brif}}. Do not apply the \code{\link[utils]{str}} function on this object because the output can be long and meaningless especially when ntrees is large. Use \code{\link[base]{summary}} to get a peek of its structure. Use \code{\link[brif]{printRules}} to print out the decision rules of a particular tree. Most of the data in the object is stored in the tree_leaves element (which is a list of lists by itself) of this list. 
\item{p}{an integer scalar, the number of variables (predictors) used in the model}
\item{var_types}{an character vector of length (p+1) containing the variable names, including the target variable name as its first element}
\item{var_labels}{an character vector of length (p+1) containing the variable types, including that of the target variable as its first element}
\item{n_bcols}{an integer vector of length (p+1), containing the numbers of binary columns generated for each variable}
\item{ntrees}{an integer scalar indicating the number of trees in the model}
\item{index_in_group}{an integer vector specifying the internal index, for each variable, in its type group}
\item{numeric_cuts}{a list containing split point information on numeric variables}
\item{integer_cuts}{a list containing split point information on integer variables}
\item{factor_cuts}{a list containing split point information on factor variables}
\item{n_num_vars}{an integer scalar indicating the numeric variables in the model}
\item{n_int_vars}{an integer scalar indicating the integer variables in the model}
\item{n_fac_vars}{an integer scalar indicating the factor variables in the model}
\item{tree_leaves}{a list containing all the leaves in the forest}
\item{yc}{a list containing the target variable encoding scheme}
}
\description{
Build a model taking a data frame as input
}
