% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/goldfeld_quandt.R
\name{goldfeld_quandt}
\alias{goldfeld_quandt}
\title{Goldfeld-Quandt Tests for Heteroskedasticity in a Linear Regression Model}
\usage{
goldfeld_quandt(
  mainlm,
  method = c("parametric", "nonparametric"),
  deflator = NA,
  prop_central = 1/3,
  group1prop = 1/2,
  alternative = c("greater", "less", "two.sided"),
  prob = NA,
  twosidedmethod = c("doubled", "kulinskaya"),
  restype = c("ols", "blus"),
  statonly = FALSE,
  ...
)
}
\arguments{
\item{mainlm}{Either an object of \code{\link[base]{class}} \code{"lm"}
(e.g., generated by \code{\link[stats]{lm}}), or
a list of two objects: a response vector and a design matrix. The objects
are assumed to be in that order, unless they are given the names
\code{"X"}  and \code{"y"} to distinguish them. The design matrix passed
in a list must begin with a column of ones if an intercept is to be
included in the linear model. The design matrix passed in a list should
not contain factors, as all columns are treated 'as is'. For tests that
use ordinary least squares residuals, one can also pass a vector of
residuals in the list, which should either be the third object or be
named \code{"e"}.}

\item{method}{A character indicating which of the two tests derived in
\insertCite{Goldfeld65;textual}{skedastic} should be implemented.
Possible values are "parametric" and "nonparametric". Default is
"parametric". It is acceptable to specify only the first letter.}

\item{deflator}{Either a character specifying a column name from the
design matrix of \code{mainlm} or an integer giving the index of a
column of the design matrix. This variable is suspected to be
related to the error variance under the alternative hypothesis.
\code{deflator} may not correspond to a column of 1's (intercept).
Default \code{NA} means the data will be left in its current order
(e.g. in case the existing index is believed to be associated with
error variance).}

\item{prop_central}{A double specifying the proportion of central
observations to exclude from the F test (when \code{method} is
\code{"parametric"} only). \code{\link[base]{round}} is
used to ensure the number of central observations is an integer. The
value must be small enough to allow the two auxiliary regressions to
be fit; otherwise an error is thrown. Defaults to \code{1 / 3}.}

\item{group1prop}{A double specifying the proportion of remaining
observations \emph{(after excluding central observations)} to allocate
to the first group. The default value of \code{1 / 2} means that an
equal number of observations is assigned to the first and second groups.}

\item{alternative}{A character specifying the form of alternative
hypothesis. If it is suspected that the
error variance is positively associated with the deflator variable,
\code{"greater"}. If it is suspected that the error variance is
negatively associated with deflator variable, \code{"less"}. If no
information is available on the suspected direction of the association,
\code{"two.sided"}. Defaults to \code{"greater"}.}

\item{prob}{A vector of probabilities corresponding to values of the test
statistic (number of peaks) from 0 to \eqn{n-1} inclusive (used
only when \code{method} is \code{"nonparametric"}). If
\code{NA} (the default), probabilities are calculated within the
function by calling \code{ppeak}. The user can improve computational
performance of the test (for instance, when the test is being used
repeatedly in a simulation) by pre-specifying the exact probability
distribution of the number of peaks using this argument, e.g. by
calling the \eqn{n}th element of \code{\link{dpeakdat}} (or \eqn{(n-p)}th
element, if BLUS residuals are used).}

\item{twosidedmethod}{A character indicating the method to be used to compute
two-sided \eqn{p}-values for the parametric test when \code{alternative}
is \code{"two.sided"}. The argument is passed to
\code{\link{twosidedpval}} as its \code{method} argument.}

\item{restype}{A character specifying which residuals to use: \code{"ols"}
for OLS residuals (the default) or the \code{"blus"} for
\link[=blus]{BLUS} residuals. The advantage of using BLUS residuals is
that, under the null hypothesis, the assumption that the random series
is independent and identically distributed is met (whereas with OLS
residuals it is not). The disadvantage of using BLUS residuals is that
only \eqn{n-p} residuals are used rather than the full \eqn{n}. This
argument is ignored if \code{method} is \code{"parametric"}.}

\item{statonly}{A logical. If \code{TRUE}, only the test statistic value
is returned, instead of an object of \code{\link[base]{class}}
\code{"htest"}. Defaults to \code{FALSE}.}

\item{...}{Optional further arguments to pass to \code{\link{blus}}.}
}
\value{
An object of \code{\link[base]{class}} \code{"htest"}. If object is
   not assigned, its attributes are displayed in the console as a
   \code{\link[tibble]{tibble}} using \code{\link[broom]{tidy}}.
}
\description{
This function implements the two methods (parametric and nonparametric) of
   \insertCite{Goldfeld65;textual}{skedastic} for testing for heteroskedasticity
   in a linear regression model.
}
\details{
The parametric test entails putting the data rows in increasing order of
   some specified deflator (one of the explanatory variables). A specified
   proportion of the most central observations (under this ordering) is
   removed, leaving a subset of lower observations and a subset of upper
   observations. Separate OLS regressions are fit to these two subsets of
   observations (using all variables from the original model). The test
   statistic is the ratio of the sum of squared residuals from the
   'upper' model to the sum of squared residuals from the 'lower' model.
   Under the null hypothesis, the test statistic is exactly F-distributed
   with numerator and denominator degrees of freedom equal to
   \eqn{(n-c)/2 - p} where \eqn{n} is the number of observations in the
   original regression model, \eqn{c} is the number of central observations
   removed, and \eqn{p} is the number of columns in the design matrix (number of
   parameters to be estimated, including intercept).

The nonparametric test entails putting the residuals of the linear model in
   increasing order of some specified deflator (one of the explanatory
   variables). The test statistic is the number of peaks, with the \eqn{j}th
   absolute residual \eqn{|e_j|} defined as a peak if \eqn{|e_j|\ge|e_i|}
   for all \eqn{i<j}. The first observation does not constitute a peak. If
   the number of peaks is large relative to the distribution of peaks under
   the null hypothesis, this constitutes evidence for heteroskedasticity.
}
\examples{
mtcars_lm <- lm(mpg ~ wt + qsec + am, data = mtcars)
goldfeld_quandt(mtcars_lm, deflator = "qsec", prop_central = 0.25)
# This is equivalent to lmtest::gqtest(mtcars_lm, fraction = 0.25, order.by = mtcars$qsec)
goldfeld_quandt(mtcars_lm, deflator = "qsec", method = "nonparametric",
 restype = "blus")
goldfeld_quandt(mtcars_lm, deflator = "qsec", prop_central = 0.25, alternative = "two.sided")
goldfeld_quandt(mtcars_lm, deflator = "qsec", method = "nonparametric",
 restype = "blus", alternative = "two.sided")
}
\references{
{\insertAllCited{}}
}
\seealso{
\code{\link[lmtest:gqtest]{lmtest::gqtest}}, another implementation
   of the Goldfeld-Quandt Test (parametric method only).
}
