% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PipeOpColApply.R
\docType{data}
\name{mlr_pipeops_colapply}
\alias{mlr_pipeops_colapply}
\alias{PipeOpColApply}
\title{PipeOpColApply}
\format{\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.}
\description{
Applies a function to each column of a task. Use the \code{affect_columns} parameter inherited from
\code{\link{PipeOpTaskPreproc}} to limit the columns this function should be applied to. This can be used
for simple parameter transformations or type conversions (e.g. \code{as.numeric}).

The same function is applied during training and prediction. One important relationship for
machine learning preprocessing is that during the prediction phase, the preprocessing on each
data row should be independent of other rows. Therefore, the \code{applicator} function should always
return a vector / list where each result component only depends on the corresponding input component and
not on other components. As a rule of thumb, if the function \code{f} generates output different
from \code{Vectorize(f)}, it is not a function that should be used for \code{applicator}.
}
\section{Construction}{
\preformatted{PipeOpColApply$new(id = "colapply", param_vals = list())
}
\itemize{
\item \code{id} :: \code{character(1)}\cr
Identifier of resulting object, default \code{"colapply"}.
\item \code{param_vals} :: named \code{list}\cr
List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
}
}

\section{Input and Output Channels}{

Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}.

The output is the input \code{\link[mlr3:Task]{Task}} with features changed according to the \code{applicator} parameter.
}

\section{State}{

The \code{$state} is a named \code{list} with the \code{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
\itemize{
\item \code{emptydt} :: \code{\link{data.table}}\cr
An empty \code{\link{data.table}} with columns of names and types from \emph{output} features after training. This is used
to produce a correct type conversion during prediction, even when the input has zero length and
\code{applicator} is therefore not called.
}
}

\section{Parameters}{

The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
\itemize{
\item \code{applicator} :: \code{function}\cr
Function to apply to each column of the task. The return value must have the
same length as the input, i.e. vectorize over the input. A typical example would be \code{as.numeric}.
Use \code{\link[base:Vectorize]{Vectorize}} to create a vectorizing function from any function that
ordinarily only takes one element input.\cr
The \code{applicator} is not called during prediction if the input task has no rows; instead the
types of affected features are changed to the result types of the \code{applicator} call during training.
Initialized to the \code{identity()}-function.
}
}

\section{Internals}{

\code{\link{PipeOpColApply}} can not inherit from \code{\link{PipeOpTaskPreprocSimple}}, because if \code{applicator} is given
and the prediction data has 0 rows, then the resulting \code{\link{data.table}} does not know
what the column types should be. Column type conformity between training and prediction is enforced
by simply saving a copy of an empty \code{\link{data.table}} in the \code{$state$emptydt} slot.
}

\section{Fields}{

Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
}

\section{Methods}{

Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
}

\examples{
library("mlr3")

task = tsk("iris")
poca = po("colapply", applicator = as.character)
poca$train(list(task))[[1]]  # types are converted

# function that does not vectorize
f = function(x) {
  # we could use `ifelse` here, but that is not the point
  if (x > 1) {
    "a"
  } else {
    "b"
  }
}
poca$param_set$values$applicator = Vectorize(f)
poca$train(list(task))[[1]]$data()

# only affect Petal.* columns:
poca$param_set$values$affect_columns = selector_grep("^Petal")
poca$train(list(task))[[1]]$data()
}
\seealso{
Other PipeOps: \code{\link{PipeOpEnsemble}},
  \code{\link{PipeOpImpute}},
  \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}},
  \code{\link{mlr_pipeops_boxcox}},
  \code{\link{mlr_pipeops_branch}},
  \code{\link{mlr_pipeops_chunk}},
  \code{\link{mlr_pipeops_classbalancing}},
  \code{\link{mlr_pipeops_classifavg}},
  \code{\link{mlr_pipeops_classweights}},
  \code{\link{mlr_pipeops_collapsefactors}},
  \code{\link{mlr_pipeops_copy}},
  \code{\link{mlr_pipeops_encodeimpact}},
  \code{\link{mlr_pipeops_encodelmer}},
  \code{\link{mlr_pipeops_encode}},
  \code{\link{mlr_pipeops_featureunion}},
  \code{\link{mlr_pipeops_filter}},
  \code{\link{mlr_pipeops_fixfactors}},
  \code{\link{mlr_pipeops_histbin}},
  \code{\link{mlr_pipeops_ica}},
  \code{\link{mlr_pipeops_imputehist}},
  \code{\link{mlr_pipeops_imputemean}},
  \code{\link{mlr_pipeops_imputemedian}},
  \code{\link{mlr_pipeops_imputenewlvl}},
  \code{\link{mlr_pipeops_imputesample}},
  \code{\link{mlr_pipeops_kernelpca}},
  \code{\link{mlr_pipeops_learner}},
  \code{\link{mlr_pipeops_missind}},
  \code{\link{mlr_pipeops_modelmatrix}},
  \code{\link{mlr_pipeops_mutate}},
  \code{\link{mlr_pipeops_nop}},
  \code{\link{mlr_pipeops_pca}},
  \code{\link{mlr_pipeops_quantilebin}},
  \code{\link{mlr_pipeops_regravg}},
  \code{\link{mlr_pipeops_removeconstants}},
  \code{\link{mlr_pipeops_scalemaxabs}},
  \code{\link{mlr_pipeops_scalerange}},
  \code{\link{mlr_pipeops_scale}},
  \code{\link{mlr_pipeops_select}},
  \code{\link{mlr_pipeops_smote}},
  \code{\link{mlr_pipeops_spatialsign}},
  \code{\link{mlr_pipeops_subsample}},
  \code{\link{mlr_pipeops_unbranch}},
  \code{\link{mlr_pipeops_yeojohnson}},
  \code{\link{mlr_pipeops}}
}
\concept{PipeOps}
\keyword{datasets}
