% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stat-poly-eq.R
\name{stat_poly_eq}
\alias{stat_poly_eq}
\title{Equation, p-value, \eqn{R^2}, AIC and BIC of fitted polynomial}
\usage{
stat_poly_eq(
  mapping = NULL,
  data = NULL,
  geom = "text_npc",
  position = "identity",
  ...,
  method = "lm",
  method.args = list(),
  formula = NULL,
  eq.with.lhs = TRUE,
  eq.x.rhs = NULL,
  small.r = FALSE,
  small.p = FALSE,
  CI.brackets = c("[", "]"),
  rsquared.conf.level = 0.95,
  coef.digits = 3,
  coef.keep.zeros = TRUE,
  rr.digits = 2,
  f.digits = 3,
  p.digits = 3,
  label.x = "left",
  label.y = "top",
  label.x.npc = NULL,
  label.y.npc = NULL,
  hstep = 0,
  vstep = NULL,
  output.type = NULL,
  na.rm = FALSE,
  orientation = NA,
  parse = NULL,
  show.legend = FALSE,
  inherit.aes = TRUE
)
}
\arguments{
\item{mapping}{The aesthetic mapping, usually constructed with
\code{\link[ggplot2]{aes}}. Only needs to be
set at the layer level if you are overriding the plot defaults.}

\item{data}{A layer specific dataset, only needed if you want to override
the plot defaults.}

\item{geom}{The geometric object to use display the data}

\item{position}{The position adjustment to use for overlapping points on this
layer}

\item{...}{other arguments passed on to \code{\link[ggplot2]{layer}}. This
can include aesthetics whose values you want to set, not map. See
\code{\link[ggplot2]{layer}} for more details.}

\item{method}{function or character If character, "lm", "rlm" or the name of
a model fit function are accepted, possibly followed by the fit function's
\code{method} argument separated by a colon (e.g. \code{"rlm:M"}). If a
function different to \code{lm()}, it must accept as a minimum a model
formula through its first parameter, and have formal parameters named
\code{data}, \code{weights}, and \code{method}, and return a model fit
object of class \code{lm}.}

\item{method.args}{named list with additional arguments.}

\item{formula}{a formula object. Using aesthetic names \code{x} and \code{y}
instead of original variable names.}

\item{eq.with.lhs}{If \code{character} the string is pasted to the front of
the equation label before parsing or a \code{logical} (see note).}

\item{eq.x.rhs}{\code{character} this string will be used as replacement for
\code{"x"} in the model equation when generating the label before parsing
it.}

\item{small.r, small.p}{logical Flags to switch use of lower case r and p for
coefficient of determination and p-value.}

\item{CI.brackets}{character vector of length 2. The opening and closing
brackets used for the CI label.}

\item{rsquared.conf.level}{numeric Confidence level for the returned
confidence interval.}

\item{coef.digits, f.digits}{integer Number of significant digits to use for
the fitted coefficients and F-value.}

\item{coef.keep.zeros}{logical Keep or drop trailing zeros when formatting
the fitted coefficients and F-value.}

\item{rr.digits, p.digits}{integer Number of digits after the decimal point to
use for \eqn{R^2} and P-value in labels.}

\item{label.x, label.y}{\code{numeric} with range 0..1 "normalized parent
coordinates" (npc units) or character if using \code{geom_text_npc()} or
\code{geom_label_npc()}. If using \code{geom_text()} or \code{geom_label()}
numeric in native data units. If too short they will be recycled.}

\item{label.x.npc, label.y.npc}{\code{numeric} with range 0..1 (npc units)
DEPRECATED, use label.x and label.y instead; together with a geom
using npcx and npcy aesthetics.}

\item{hstep, vstep}{numeric in npc units, the horizontal and vertical step
used between labels for different groups.}

\item{output.type}{character One of "expression", "LaTeX", "text",
"markdown" or "numeric".}

\item{na.rm}{a logical indicating whether NA values should be stripped before
the computation proceeds.}

\item{orientation}{character Either "x" or "y" controlling the default for
\code{formula}.}

\item{parse}{logical Passed to the geom. If \code{TRUE}, the labels will be
parsed into expressions and displayed as described in \code{?plotmath}.
Default is \code{TRUE} if \code{output.type = "expression"} and
\code{FALSE} otherwise.}

\item{show.legend}{logical. Should this layer be included in the legends?
\code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
never includes, and \code{TRUE} always includes.}

\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather
than combining with them. This is most useful for helper functions that
define both data and aesthetics and shouldn't inherit behaviour from the
default plot specification, e.g. \code{\link[ggplot2]{borders}}.}
}
\description{
\code{stat_poly_eq} fits a polynomial by default with \code{stats::lm()} but
alternatively using robust regression. From the fitted model it
generates several labels including the equation, p-value, F-value,
coefficient of determination (R^2), 'AIC', 'BIC', and number of observations.
}
\details{
This statistic can be used to automatically annotate a plot with
  \eqn{R^2}, adjusted \eqn{R^2} or the fitted model equation. It supports
  linear regression, robust linear regression and median regression fitted
  with functions \code{\link{lm}}, \code{\link[MASS]{rlm}} or
  \code{\link[quantreg]{rq}}. The \eqn{R^2} and adjusted \eqn{R^2} annotations
  can be used with any linear model formula. The confidence interval for
  \eqn{R^2} is computed with package \code{\link[confintr]{ci_rsquared}}. The
  fitted equation label is correctly generated for polynomials or
  quasi-polynomials through the origin. Model formulas can use \code{poly()}
  or be defined algebraically with terms of powers of increasing magnitude
  with no missing intermediate terms, except possibly for the intercept
  indicated by "- 1" or "-1" or \code{"+ 0"} in the formula. The validity of
  the \code{formula} is not checked in the current implementation, and for
  this reason the default aesthetics sets \eqn{R^2} as label for the
  annotation. This statistic generates labels as R expressions by default but
  LaTeX (use TikZ device), markdown (use package 'ggtext') and plain text are
  also supported, as well as numeric values for user-generated text labels.
  The value of \code{parse} is set automatically based on \code{output-type},
  but if you assemble labels that need parsing from \code{numeric} output,
  the default needs to be overridden. This stat only generates annotation
  labels, the predicted values/line need to be added to the plot as a
  separate layer using \code{\link{stat_poly_line}} or
  \code{\link[ggplot2]{stat_smooth}}, so to make sure that the same model
  formula is used in all steps it is best to save the formula as an object
  and supply this object as argument to the different statistics.

  A ggplot statistic receives as \code{data} a data frame that is not the one
  passed as argument by the user, but instead a data frame with the variables
  mapped to aesthetics. \code{stat_poly_eq()} mimics how \code{stat_smooth()}
  works, except that only polynomials can be fitted. Similarly to these
  statistics the model fits respect grouping, so the scales used for \code{x}
  and \code{y} should both be continuous scales rather than discrete.
}
\note{
For backward compatibility a logical is accepted as argument for
  \code{eq.with.lhs}. If \code{TRUE}, the default is used, either
  \code{"x"} or \code{"y"}, depending on the argument passed to \code{formula}.
  However, \code{"x"} or \code{"y"} can be substituted by providing a
  suitable replacement character string through \code{eq.x.rhs}.
  Parameter \code{orientation} is redundant as it only affects the default
  for \code{formula} but is included for consistency with
  \code{ggplot2::stat_smooth()}.

  R option \code{OutDec} is obeyed based on its value at the time the plot
  is rendered, i.e., displayed or printed. Set \code{options(OutDec = ",")}
  for languages like Spanish or French.
}
\section{IMPORTANT}{
 \code{stat_regline_equation()} in package 'ggpubr' is
  a renamed but almost unchanged copy of \code{stat_poly_eq()} taken from an
  earlier version of this package (without acknowledgement of source and
  authorship). \code{stat_regline_equation()} lacks important functionality
  and contains bugs that have been fixed in \code{stat_poly_eq()}.
}

\section{Aesthetics}{
 \code{stat_poly_eq()} understands \code{x} and \code{y},
  to be referenced in the \code{formula} and \code{weight} passed as argument
  to parameter \code{weights}. All three must be mapped to
  \code{numeric} variables. In addition, the aesthetics understood by the geom
  (\code{"text"} is the default) are understood and grouping respected.
}

\section{Computed variables}{

If output.type different from \code{"numeric"} the returned tibble contains
columns listed below. If the model fit function used does not return a value,
the label is set to \code{character(0L)}.
\describe{
  \item{x,npcx}{x position}
  \item{y,npcy}{y position}
  \item{eq.label}{equation for the fitted polynomial as a character string to be parsed}
  \item{rr.label}{\eqn{R^2} of the fitted model as a character string to be parsed}
  \item{adj.rr.label}{Adjusted \eqn{R^2} of the fitted model as a character string to be parsed}
  \item{rr.confint.label}{Confidence interval for \eqn{R^2} of the fitted model as a character string to be parsed}
  \item{f.value.label}{F value and degrees of freedom for the fitted model as a whole.}
  \item{p.value.label}{P-value for the F-value above.}
  \item{AIC.label}{AIC for the fitted model.}
  \item{BIC.label}{BIC for the fitted model.}
  \item{n.label}{Number of observations used in the fit.}
  \item{grp.label}{Set according to mapping in \code{aes}.}
  \item{method.label}{Set according \code{method} used.}
  \item{r.squared, adj.r.squared, p.value, n}{numeric values, from the model fit object}}

If output.type is \code{"numeric"} the returned tibble contains columns
listed below. If the model fit function used does not return a value,
the variable is set to \code{NA_real_}.
\describe{
  \item{x,npcx}{x position}
  \item{y,npcy}{y position}
  \item{coef.ls}{list containing the "coefficients" matrix from the summary of the fit object}
  \item{r.squared, rr.confint.level, rr.confint.low, rr.confint.high, adj.r.squared, f.value, f.df1, f.df2, p.value, AIC, BIC, n}{numeric values, from the model fit object}
  \item{grp.label}{Set according to mapping in \code{aes}.}
  \item{b_0.constant}{TRUE is polynomial is forced through the origin}
  \item{b_i}{One or columns with the coefficient estimates}}

To explore the computed values returned for a given input we suggest the use
of \code{\link[gginnards]{geom_debug}} as shown in the last examples below.
}

\examples{
# generate artificial data
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
y <- y / max(y)
my.data <- data.frame(x = x, y = y,
                      group = c("A", "B"),
                      y2 = y * c(1, 2) + c(0, 0.1),
                      w = sqrt(x))

# give a name to a formula
formula <- y ~ poly(x, 3, raw = TRUE)

# using defaults
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line() +
  stat_poly_eq()

# no weights
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula)

# other labels
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(use_label("eq"), formula = formula)

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(use_label(c("eq", "R2")), formula = formula)

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(use_label(c("R2", "R2.CI", "P", "method")), formula = formula)

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(use_label(c("R2", "F", "P", "n"), sep = "*\"; \"*"),
               formula = formula)

# grouping
ggplot(my.data, aes(x, y2, color = group)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula)

# rotation
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula, angle = 90)

# label location
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula, label.y = "bottom", label.x = "right")

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula, label.y = 0.1, label.x = 0.9)

# using weights
ggplot(my.data, aes(x, y, weight = w)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula)

# no weights, 4 digits for R square
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula, rr.digits = 4)

# manually assemble and map a specific label using paste() and aes()
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(aes(label =  paste(after_stat(rr.label),
                                  after_stat(n.label), sep = "*\", \"*")),
               formula = formula)

# manually assemble and map a specific label using sprintf() and aes()
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(aes(label =  sprintf("\%s*\" with \"*\%s*\" and \"*\%s",
                                    after_stat(rr.label),
                                    after_stat(f.value.label),
                                    after_stat(p.value.label))),
               formula = formula)

# x on y regression
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula, orientation = "y") +
  stat_poly_eq(use_label(c("eq", "adj.R2")),
               formula = x ~ poly(y, 3, raw = TRUE))

# conditional user specified label
ggplot(my.data, aes(x, y2, color = group)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(aes(label =  ifelse(after_stat(adj.r.squared) > 0.96,
                                   paste(after_stat(adj.rr.label),
                                         after_stat(eq.label),
                                         sep = "*\", \"*"),
                                   after_stat(adj.rr.label))),
               rr.digits = 3,
               formula = formula)

# geom = "text"
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(geom = "text", label.x = 100, label.y = 0, hjust = 1,
               formula = formula)

# using numeric values
# Here we use columns b_0 ... b_3 for the coefficient estimates
my.format <-
  "b[0]~`=`~\%.3g*\", \"*b[1]~`=`~\%.3g*\", \"*b[2]~`=`~\%.3g*\", \"*b[3]~`=`~\%.3g"
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_poly_line(formula = formula) +
  stat_poly_eq(formula = formula,
               output.type = "numeric",
               parse = TRUE,
               mapping =
                aes(label = sprintf(my.format,
                                    after_stat(b_0), after_stat(b_1),
                                    after_stat(b_2), after_stat(b_3))))

# Inspecting the returned data using geom_debug()
# This provides a quick way of finding out the names of the variables that
# are available for mapping to aesthetics with after_stat().

gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)

if (gginnards.installed)
  library(gginnards)

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug", output.type = "numeric")

# names of the variables
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug",
                 summary.fun = colnames)

# only data$eq.label
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug",
                 output.type = "expression",
                 summary.fun = function(x) {x[["eq.label"]]})

# only data$eq.label
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(aes(label = after_stat(eq.label)),
                 formula = formula, geom = "debug",
                 output.type = "markdown",
                 summary.fun = function(x) {x[["eq.label"]]})

# only data$eq.label
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug",
                 output.type = "latex",
                 summary.fun = function(x) {x[["eq.label"]]})

# only data$eq.label
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug",
                 output.type = "text",
                 summary.fun = function(x) {x[["eq.label"]]})

# show the content of a list column
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_poly_line(formula = formula) +
    stat_poly_eq(formula = formula, geom = "debug", output.type = "numeric",
                 summary.fun = function(x) {x[["coef.ls"]][[1]]})

}
\references{
Written as an answer to question 7549694 at Stackoverflow.
}
\seealso{
This \code{stat_poly_eq} statistic can return ready formatted labels
  depending on the argument passed to \code{output.type}. This is possible
  because only polynomial models are supported. For quantile regression
  \code{\link{stat_quant_eq}} should be used instead of \code{stat_poly_eq}
  while for model II or major axis regression \code{\link{stat_ma_eq}} should
  be used. For other types of models such as non-linear models, statistics
  \code{\link{stat_fit_glance}} and \code{\link{stat_fit_tidy}} should be
  used and the code for construction of character strings from
  numeric values and their mapping to aesthetic \code{label} needs to be
  explicitly supplied by the user.

Other ggplot statistics for linear and polynomial regression: 
\code{\link{stat_poly_line}()}
}
\concept{ggplot statistics for linear and polynomial regression}
