% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bayes.R
\name{bayesProbabilityAssign}
\alias{bayesProbabilityAssign}
\title{Assign probabilities to one or more samples, given some training data.}
\usage{
bayesProbabilityAssign(
  dfTrain,
  dfValid,
  targetCol,
  selectedFeatureNames = c(),
  shiftAmount = 0.1,
  retainMinValues = 1,
  doEcdf = FALSE,
  online = 0,
  simple = FALSE,
  naive = FALSE,
  useParallel = NULL,
  returnProbabilityTable = FALSE
)
}
\arguments{
\item{dfTrain}{data.frame that holds the training data.}

\item{dfValid}{data.frame that holds the validation samples, for each of which
a probability is sought. The convention is, that if you attempt to assign a
probability to a numeric value, it ought to be found in the target column of
this data frame (otherwise, the target column is not required in it).}

\item{targetCol}{character the name of targeted feature, i.e., the feature to
assign a probability to.}

\item{selectedFeatureNames}{character defaults to empty vector which defaults
to using all available features. Use this to select subsets of features and to
order features.}

\item{shiftAmount}{numeric an offset value used to increase any one
probability (factor) in the full built equation.}

\item{retainMinValues}{integer to require a minimum amount of data points
when segmenting the data feature by feature.}

\item{doEcdf}{default FALSE a boolean to indicate whether to use the
empirical CDF to return a probability when inferencing a continuous
feature.}

\item{online}{default 0 integer to indicate how many rows should be used to
do inferencing. If zero, then only the initially given data.frame dfTrain is
used. If > 0, then each inferenced sample will be attached to it and the
resulting data.frame is truncated to this number. Use an integer large enough
(i.e., sum of training and validation rows) to keep all samples during
inferencing. A smaller amount as, e.g., in dfTrain, will keep the amount of data
restricted, discarding older rows. A larger amount than, e.g., in dfTrain is
also fine; dfTrain will grow to it and then discard rows.}

\item{simple}{default FALSE boolean to indicate whether or not to use simple
Bayesian inferencing instead of full. This is faster but the results are less
good. If true, uses \code{mmb::bayesProbabilitySimple()}. Otherwise, uses
\code{mmb::bayesProbability()}.}

\item{naive}{default FALSE boolean to indicate whether or not to use naive
Bayesian inferencing instead of full or simple.}

\item{useParallel}{boolean DEFAULT NULL this is forwarded to the underlying
function \code{mmb::bayesProbability()} (only in simple=FALSE mode).}

\item{returnProbabilityTable}{default FALSE boolean to indicate whether to
return only the probabilities for each validation sample or whether a table
with a probability for each tested label should be returned. This has no
effect when inferencing probabilities for numeric values, as the table then
only has one column "probability". The first column of this table is always
called "rowname" and corresponds to the rownames of dfValid.}
}
\description{
This method uses full-dependency (\code{simple=F}) Bayesian
inferencing to assign a probability to the target feature in all of the
samples given in \code{dfValid}. Tests each sample using @seealso
\code{mmb::bayesProbability()} or @seealso \code{mmb::bayesProbabilitySimple()}.
It mostly forwards the given arguments to these functions, and you will find
good documentation there.
}
\examples{
w <- mmb::getWarnings()
mmb::setWarnings(FALSE)

set.seed(84735)
rn <- base::sample(rownames(iris), 150)
dfTrain <- iris[rn[1:120], ]
dfValid <- iris[rn[121:150], !(colnames(iris) \%in\% "Species") ]
mmb::bayesProbabilityAssign(dfTrain, dfValid, "Species")

mmb::setWarnings(w)
}
\references{
\insertRef{bayes1763lii}{mmb}
}
\author{
Sebastian Hönel \href{mailto:sebastian.honel@lnu.se}{sebastian.honel@lnu.se}
}
\keyword{classification}
\keyword{full-dependency}
\keyword{inferencing}
