% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/estimate_predicted.R
\name{estimate_expectation}
\alias{estimate_expectation}
\alias{estimate_link}
\alias{estimate_prediction}
\alias{estimate_relation}
\title{Model-based predictions}
\usage{
estimate_expectation(
  model,
  data = NULL,
  by = NULL,
  predict = "expectation",
  ci = 0.95,
  transform = NULL,
  keep_iterations = FALSE,
  ...
)

estimate_link(
  model,
  data = "grid",
  by = NULL,
  predict = "link",
  ci = 0.95,
  transform = NULL,
  keep_iterations = FALSE,
  ...
)

estimate_prediction(
  model,
  data = NULL,
  by = NULL,
  predict = "prediction",
  ci = 0.95,
  transform = NULL,
  keep_iterations = FALSE,
  ...
)

estimate_relation(
  model,
  data = "grid",
  by = NULL,
  predict = "expectation",
  ci = 0.95,
  transform = NULL,
  keep_iterations = FALSE,
  ...
)
}
\arguments{
\item{model}{A statistical model.}

\item{data}{A data frame with model's predictors to estimate the response. If
\code{NULL}, the model's data is used. If \code{"grid"}, the model matrix is obtained
(through \code{\link[insight:get_datagrid]{insight::get_datagrid()}}).}

\item{by}{The predictor variable(s) at which to estimate the response. Other
predictors of the model that are not included here will be set to their mean
value (for numeric predictors), reference level (for factors) or mode (other
types). The \code{by} argument will be used to create a data grid via
\code{insight::get_datagrid()}, which will then be used as \code{data} argument. Thus,
you cannot specify both \code{data} and \code{by} but only of these two arguments.}

\item{predict}{This parameter controls what is predicted (and gets internally
passed to \code{\link[insight:get_predicted]{insight::get_predicted()}}). In most cases, you don't need to care
about it: it is changed automatically according to the different predicting
functions (i.e., \code{estimate_expectation()}, \code{estimate_prediction()}, \code{estimate_link()}
or \code{estimate_relation()}). The only time you might be interested in manually
changing it is to estimate other distributional parameters (called "dpar" in
other packages) - for instance when using complex formulae in \code{brms} models.
The \code{predict} argument can then be set to the parameter you want to
estimate, for instance \code{"sigma"}, \code{"kappa"}, etc. Note that the distinction
between \code{"expectation"}, \code{"link"} and \code{"prediction"} does not then apply (as
you are directly predicting the value of some distributional parameter), and
the corresponding functions will then only differ in the default value of
their \code{data} argument.}

\item{ci}{Confidence Interval (CI) level. Default to \code{0.95} (\verb{95\%}).}

\item{transform}{A function applied to predictions and confidence intervals
to (back-) transform results, which can be useful in case the regression
model has a transformed response variable (e.g., \code{lm(log(y) ~ x)}). Can also
be \code{TRUE}, in which case \code{insight::get_transformation()} is called to
determine the appropriate transformation-function. \strong{Note:} Standard errors
are not (back-) transformed!}

\item{keep_iterations}{If \code{TRUE}, will keep all iterations (draws) of
bootstrapped or Bayesian models. They will be added as additional columns
named \verb{iter_1, iter_2, ...}. You can reshape them to a long format by
running \code{\link[bayestestR:reshape_iterations]{reshape_iterations()}}.}

\item{...}{You can add all the additional control arguments from
\code{\link[insight:get_datagrid]{insight::get_datagrid()}} (used when \code{data = "grid"}) and
\code{\link[insight:get_predicted]{insight::get_predicted()}}.}
}
\value{
A data frame of predicted values and uncertainty intervals, with
class \code{"estimate_predicted"}. Methods for \code{\link[=visualisation_recipe.estimate_predicted]{visualisation_recipe()}}
and \code{\link[=visualisation_recipe.estimate_predicted]{plot()}} are available.
}
\description{
After fitting a model, it is useful generate model-based estimates of the
response variables for different combinations of predictor values. Such
estimates can be used to make inferences about \strong{relationships} between
variables, to make predictions about individual cases, or to compare the
\strong{predicted} values against the observed data.

The \code{modelbased} package includes 4 "related" functions, that mostly differ in
their default arguments (in particular, \code{data} and \code{predict}):
\itemize{
\item \code{estimate_prediction(data = NULL, predict = "prediction", ...)}
\item \code{estimate_expectation(data = NULL, predict = "expectation", ...)}
\item \code{estimate_relation(data = "grid", predict = "expectation", ...)}
\item \code{estimate_link(data = "grid", predict = "link", ...)}
}

While they are all based on model-based predictions (using
\code{\link[insight:get_predicted]{insight::get_predicted()}}), they differ in terms of the \strong{type} of
predictions they make by default. For instance, \code{estimate_prediction()} and
\code{estimate_expectation()} return predictions for the original data used to fit
the model, while \code{estimate_relation()} and \code{estimate_link()} return
predictions on a \code{\link[insight:get_datagrid]{insight::get_datagrid()}}. Similarly, \code{estimate_link}
returns predictions on the link scale, while the others return predictions on
the response scale. Note that the relevance of these differences depends on
the model family (for instance, for linear models, \code{estimate_relation} is
equivalent to \code{estimate_link()}, since there is no difference between the
link-scale and the response scale).

Note that you can run \code{\link[=visualisation_recipe.estimate_predicted]{plot()}} on
the output of these functions to get some visual insights (see the
\link[=visualisation_recipe.estimate_predicted]{plotting examples}).

See the \strong{details} section below for details about the different possibilities.
}
\note{
These functions are built on top of \code{\link[insight:get_predicted]{insight::get_predicted()}} and correspond
to different specifications of its parameters. It may be useful to read its
\href{https://easystats.github.io/insight/reference/get_predicted.html}{documentation},
in particular the description of the \code{predict} argument for additional
details on the difference between expected vs. predicted values and link vs.
response scales.

Additional control parameters can be used to control results from
\code{\link[insight:get_datagrid]{insight::get_datagrid()}} (when \code{data = "grid"}) and from
\code{\link[insight:get_predicted]{insight::get_predicted()}} (the function used internally to compute
predictions).

For plotting, check the examples in \code{\link[=visualisation_recipe]{visualisation_recipe()}}. Also check out
the \href{https://easystats.github.io/modelbased/articles/}{Vignettes} and \href{https://easystats.github.io/modelbased/index.html#features}{README examples} for
various examples, tutorials and usecases.
}
\section{Expected (average) values}{


The most important way that various types of response estimates differ is in
terms of what quantity is being estimated and the meaning of the uncertainty
intervals. The major choices are \strong{expected values} for uncertainty in the
regression line and \strong{predicted values} for uncertainty in the individual
case predictions.

\strong{Expected values} refer to the fitted regression line - the estimated
\emph{average} response value (i.e., the "expectation") for individuals with
specific predictor values. For example, in a linear model \emph{y} = 2 + 3\emph{x} +
4\emph{z} + \emph{e}, the estimated average \emph{y} for individuals with \emph{x} = 1 and \emph{z} =
2 is 11.

For expected values, uncertainty intervals refer to uncertainty in the
estimated \strong{conditional average} (where might the true regression line
actually fall)? Uncertainty intervals for expected values are also called
"confidence intervals".

Expected values and their uncertainty intervals are useful for describing the
relationship between variables and for describing how precisely a model has
been estimated.

For generalized linear models, expected values are reported on one of two scales:
\itemize{
\item The \strong{link scale} refers to scale of the fitted regression line, after
transformation by the link function. For example, for a logistic regression
(logit binomial) model, the link scale gives expected log-odds. For a
log-link Poisson model, the link scale gives the expected log-count.
\item The \strong{response scale} refers to the original scale of the response
variable (i.e., without any link function transformation). Expected values
on the link scale are back-transformed to the original response variable
metric (e.g., expected probabilities for binomial models, expected counts
for Poisson models).
}
}

\section{Individual case predictions}{


In contrast to expected values, \strong{predicted values} refer to predictions for
\strong{individual cases}. Predicted values are also called "posterior
predictions" or "posterior predictive draws".

For predicted values, uncertainty intervals refer to uncertainty in the
\strong{individual response values for each case} (where might any single case
actually fall)? Uncertainty intervals for predicted values are also called
"prediction intervals" or "posterior predictive intervals".

Predicted values and their uncertainty intervals are useful for forecasting
the range of values that might be observed in new data, for making decisions
about individual cases, and for checking if model predictions are reasonable
("posterior predictive checks").

Predicted values and intervals are always on the scale of the original
response variable (not the link scale).
}

\section{Functions for estimating predicted values and uncertainty}{


\emph{modelbased} provides 4 functions for generating model-based response
estimates and their uncertainty:
\itemize{
\item \strong{\code{estimate_expectation()}}:
\itemize{
\item Generates \strong{expected values} (conditional average) on the \strong{response scale}.
\item The uncertainty interval is a \emph{confidence interval}.
\item By default, values are computed using the data used to fit the model.
}
\item \strong{\code{estimate_link()}}:
\itemize{
\item Generates \strong{expected values} (conditional average) on the \strong{link scale}.
\item The uncertainty interval is a \emph{confidence interval}.
\item By default, values are computed using a reference grid spanning the
observed range of predictor values (see \code{\link[insight:get_datagrid]{insight::get_datagrid()}}).
}
\item \strong{\code{estimate_prediction()}}:
\itemize{
\item Generates \strong{predicted values} (for individual cases) on the \strong{response scale}.
\item The uncertainty interval is a \emph{prediction interval}.
\item By default, values are computed using the data used to fit the model.
}
\item \strong{\code{estimate_relation()}}:
\itemize{
\item Like \code{estimate_expectation()}.
\item Useful for visualizing a model.
\item Generates \strong{expected values} (conditional average) on the \strong{response scale}.
\item The uncertainty interval is a \emph{confidence interval}.
\item By default, values are computed using a reference grid spanning the
observed range of predictor values (see \code{\link[insight:get_datagrid]{insight::get_datagrid()}}).
}
}
}

\section{Data for predictions}{


If the \code{data = NULL}, values are estimated using the data used to fit the
model. If \code{data = "grid"}, values are computed using a reference grid
spanning the observed range of predictor values with
\code{\link[insight:get_datagrid]{insight::get_datagrid()}}. This can be useful for model visualization. The
number of predictor values used for each variable can be controlled with the
\code{length} argument. \code{data} can also be a data frame containing columns with
names matching the model frame (see \code{\link[insight:get_data]{insight::get_data()}}). This can be used
to generate model predictions for specific combinations of predictor values.
}

\examples{
\dontshow{if (all(insight::check_if_installed(c("see", "lme4", "rstanarm"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
library(modelbased)

# Linear Models
model <- lm(mpg ~ wt, data = mtcars)

# Get predicted and prediction interval (see insight::get_predicted)
estimate_expectation(model)

# Get expected values with confidence interval
pred <- estimate_relation(model)
pred

# Visualisation (see visualisation_recipe())
plot(pred)

# Standardize predictions
pred <- estimate_relation(lm(mpg ~ wt + am, data = mtcars))
z <- standardize(pred, include_response = FALSE)
z
unstandardize(z, include_response = FALSE)

# Logistic Models
model <- glm(vs ~ wt, data = mtcars, family = "binomial")
estimate_expectation(model)
estimate_relation(model)

# Mixed models
model <- lme4::lmer(mpg ~ wt + (1 | gear), data = mtcars)
estimate_expectation(model)
estimate_relation(model)

# Bayesian models
\donttest{
model <- suppressWarnings(rstanarm::stan_glm(
  mpg ~ wt,
  data = mtcars, refresh = 0, iter = 200
))
estimate_expectation(model)
estimate_relation(model)
}
\dontshow{\}) # examplesIf}
}
