% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stability.R
\name{stabilityGLM}
\alias{stabilityGLM}
\title{Computes the area under the stability path for all covariates}
\usage{
stabilityGLM(X, Y, weights = rep(1, nrow(X)), family = "gaussian",
  n_subsample = 20, n_lambda = 100, short = TRUE,
  lambda_min_ratio = 0.01, eps = 1e-05)
}
\arguments{
\item{X}{input design matrix}

\item{Y}{response vector}

\item{weights}{nonnegative sample weights}

\item{family}{response type. Either 'gaussian' or 'binomial'}

\item{n_subsample}{number of subsamples for stability selection}

\item{n_lambda}{total number of lambda values}

\item{short}{whether to compute the aucs only on the first half
of the stability path. We observed better performance for
thresholded paths}

\item{lambda_min_ratio}{ratio of \eqn{\lambda_{min}}{lambda_min} to
\eqn{\lambda_{max}}{lambda_max} (see description for a thorough explanation)}

\item{eps}{elastic net mixing parameter.}
}
\value{
a vector containing the areas under the stability path
curves
}
\description{
To perform model selection, this function scores all covariates in \code{X}
according to the area under their stability selection paths. Our model selection
procedure starts by dynamically defining a grid for
the elastic net penalization parameter \eqn{\lambda}{lambda}. To define the
grid, we solve the full-dataset elastic net. This yields
\code{n_lambda} log-scaled values between \eqn{\lambda_{max}}{lambda_max}
and \eqn{\lambda_{min}}{lambda_min}. \eqn{\lambda_{max}}{lambda_max} is
the maximum value for which the elastic net support is not empty. On the other hand,
\eqn{\lambda_{min}}{lambda_min} can be derived through
\code{lambda_min_ratio}, which is the ratio of \eqn{\lambda_{min}}{lambda_min}
to \eqn{\lambda_{max}}{lambda_max}. The next step is identical to the original
stability selection procedure. For each value of \eqn{\lambda}{lambda}, we
solve \code{n_subsample} times the same elastic net, though for a different subsample.
The subsample is a random selection of half of the samples of the original dataset.
The empirical frequency of each covariate entering the support is then the number of
times the covariate is selected in the support as a fraction of \code{n_subsample}.
We obtain the stability path by associating each value of \eqn{\lambda}{lambda}
with the corresponding empirical frequency. The final scores are the areas under the
stability path curves. This is a key difference with the original stability
selection procedure where the final score is the maximum empirical frequency.
On simulations, our scoring technique outperformed maximum empirical
frequencies.
}
\details{
For a fixed \eqn{\lambda}{lambda},
the L2 penalization is \eqn{\lambda \times eps}{lambda * eps}, while
the L1 penalization is \eqn{\lambda \times (1-eps)}{lambda * (1-eps)}.
The goal of the L2 penalization is to ensure the uniqueness of the
solution. For that reason, we recommend setting eps << 1.
}
\examples{
# ---- Continuous data ----
n <- 50
p <- 20
X <- matrix(rnorm(n * p), ncol = p)
Y <- crossprod(t(X), rnorm(p))
aucs_cont <- stabilityGLM(X, Y, family = "gaussian", n_subsample = 1,
                          short = FALSE)

# ---- Binary data ----
X <- matrix(rnorm(n * p), ncol = p)
Y <- runif(n, min = 0, max = 1) < 1/ (1 + exp(-X[, c(1, 7, 15)] \%*\% rnorm(3)))
weights <- runif(n, min = 0.4, max = 0.8)
aucs_binary <- stabilityGLM(X, Y, weights = weights,
                            n_lambda = 50, lambda_min_ratio = 0.05, n_subsample = 1)


}
\references{
Slim, L., Chatelain, C., Azencott, C.-A., & Vert, J.-P.
(2018). Novel Methods for Epistasis Detection in Genome-Wide Association
Studies. BioRxiv.

Meinshausen, N., & Bühlmann, P. (2010). Stability
selection. Journal of the Royal Statistical Society: Series B
(Statistical Methodology), 72(4), 417–473.

Haury, A. C., Mordelet, F., Vera-Licona, P., & Vert, J. P.
(2012). TIGRESS: Trustful Inference of Gene REgulation using Stability
Selection. BMC Systems Biology, 6.
}
\seealso{
\code{\link[glmnet]{glmnet-package}}

Other support estimation functions: \code{\link{stabilityBIG}}
}
\concept{support estimation functions}
