% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/safe_extraction.R
\name{safe_extraction}
\alias{safe_extraction}
\title{Creating SAFE Extractor - an Object Used for Surrogate-Assisted Feature Extraction}
\usage{
safe_extraction(
  explainer,
  response_type = "ale",
  grid_points = 50,
  N = 200,
  penalty = "MBIC",
  nquantiles = 10,
  no_segments = 2,
  method = "complete",
  B = 500,
  collapse = "_",
  interactions = FALSE,
  inter_param = 0.25,
  inter_threshold = 0.25,
  verbose = TRUE
)
}
\arguments{
\item{explainer}{DALEX explainer created with explain() function}

\item{response_type}{character, type of response to be calculated, one of: "pdp", "ale".
If features are uncorrelated, one can use "pdp" type - otherwise "ale" is strongly recommended.}

\item{grid_points}{number of points on x-axis used for creating the PD/ALE plot, default 50}

\item{N}{number of observations from the dataset used for creating the PD/ALE plot, default 200}

\item{penalty}{penalty for introducing another changepoint,
one of "AIC", "BIC", "SIC", "MBIC", "Hannan-Quinn" or numeric non-negative value}

\item{nquantiles}{the number of quantiles used in integral approximation}

\item{no_segments}{numeric, a number of segments variable is to be divided into in case of founding no breakpoints}

\item{method}{the agglomeration method to be used in hierarchical clustering, one of:
"ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid"}

\item{B}{number of reference datasets used to calculate gap statistics}

\item{collapse}{a character string to separate original levels while combining them to the new one}

\item{interactions}{logical, if interactions between variables are to be taken into account}

\item{inter_param}{numeric, a positive value indicating which of single observation non-additive effects
are to be regarded as significant, the higher value the higher non-additive effect has to be to be taken
into account}

\item{inter_threshold}{numeric, a value from \verb{[0,1]} interval indicating which interactions should be returned
as significant. It corresponds to the percentage of observations for which interaction measure is greater
than inter_param - if this percentage is less than inter_threshold then interaction effect is ignored.}

\item{verbose}{logical, if progress bar is to be printed}
}
\value{
safe_extractor object containing information about variables transformation
}
\description{
The safe_extraction() function creates a SAFE-extractor object which may be used later
for surrogate feature extraction.
}
\examples{

library(DALEX)
library(randomForest)
library(rSAFE)

data <- apartments[1:500,]
set.seed(111)
model_rf <- randomForest(m2.price ~ construction.year + surface + floor +
                           no.rooms + district, data = data)
explainer_rf <- explain(model_rf, data = data[,2:6], y = data[,1], verbose = FALSE)
safe_extractor <- safe_extraction(explainer_rf, grid_points = 30, N = 100, verbose = FALSE)
print(safe_extractor)
plot(safe_extractor, variable = "construction.year")

}
\seealso{
\code{\link{safely_transform_categorical}}, \code{\link{safely_transform_continuous}}, \code{\link{safely_detect_interactions}}, \code{\link{safely_transform_data}}
}
