% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/similarities.R
\name{similarities}
\alias{similarities}
\title{Calculating similarities between classification trees}
\usage{
similarities(fulldata, treedata, Y, X, trees, m, weight = NULL, A = NULL,
  tol = NULL, regime = FALSE)
}
\arguments{
\item{fulldata}{The original full data set}

\item{treedata}{A list with data sets on which the trees in the forest were based (i.e., one data set for each tree)}

\item{Y}{A vector with the name of the outcome variable on which each tree in the forest was based}

\item{X}{The names of the predictor variables that were used as possible split variables}

\item{trees}{A list with all trees between which similarities should be computed, each tree should be stored as party object}

\item{m}{Similarity measure that should be used to calculate similarities, where m=1 is based on counting equal predictors or predictor-split point combinations (Equation 5 or 8  in Sies & Van Mechelen (Submitted), m=2
is the measure of Shannon & Banks (1999), based on counting the number of equal paths from rootnode to leafs (See Sies & Van Mechelen Submitted, Equation 2), m=3 is 
based on the agreement in classification lables (Chipman, 1998), see Sies & Van Mechelen (submitted), Equation 14, m=4
is based on the agreement of partitions (Chipman, 1998), see Sies & Van Mechelen (Submitted), Equation 13, and m=5 is based on counting equal
elementary conjunctions of trees transformed to disjunctive normal form (only for binary predictors, see Sies & Van Mechelen, Submitted, Equation 16). Finally M6 is based on comparing sets of predictor split piont 
combinations (taking into account directions of the splits) associated with a leaf, taking into account the classification label of that leaf, see Sies & Van Mechelen (submitted).}

\item{weight}{Indicating whether or not splitpoints should be taken into account for m=1, where 0 means no (Equation 4 in Sies & Van Mechelen, submitted) and 1 means yes (Equation 8 in Sies & Van Mechelen, submitted).}

\item{A}{The name of the treatment variable in case of a forest of tree-based treatment regimes, otherwise NULL by default.}

\item{tol}{In case that weight = 1: A vector with for each predictor the tolerance zone within which two split points of the predictor in question are assumed equal. Default=NULL}

\item{regime}{Indicating whether the trees in the forest are treatment regimes (TRUE) or decision trees (FALSE). Default=FALSE}
}
\value{
{sim}{Similarity matrix based on chosen similarity measure}
}
\description{
Function to calculate similarities between classification trees, based on 6 different  possible similarity measures.
}
\examples{
require(MASS)
#Grow a forest of classification trees based on 10 bootstrap samples
forest <- growforest(Pima.tr, X=c("npreg", "glu", "bp", "skin", "bmi", "ped", "age"), 
Y ="type", ntrees = 10)

# Calculate similiarties between all pairs of trees in the forest
simmatrix <- similarities(fulldata = Pima.tr, treedata = forest[[2]], Y = rep("type", 10), 
X = c("npreg", "glu", "bp", "skin", "bmi", "ped", "age"), trees = forest[[1]], m = 1, weight = 0)

simmatrix2 <- similarities(fulldata = Pima.tr, treedata = forest[[2]], Y = rep("type",10), 
X = c("npreg", "glu", "bp", "skin", "bmi", "ped", "age"), trees = forest[[1]], m = 1, 
weight = 1, tol = c(3, 30, 10, 10, 5, 0.3, 10))
}
\references{
\cite{Shannon, W. D., & Banks, D. (1999). Combining classification trees using MLE. Statistics in medicine, 18(6), 727-740.}

\cite{Chipman, H. A., George, E. I., & McCulloh, R. E. (1998). Making sense of a forest of trees. Computing Science and Statistics, 84-92.}

\cite{Sies, A. & Van Mechelen I. (Submitted). C443: An R-package to see a forest for the trees}
}
