% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/graphical_model.R
\name{GraphicalModel}
\alias{GraphicalModel}
\title{Stability selection graphical model}
\usage{
GraphicalModel(
  xdata,
  pk = NULL,
  Lambda = NULL,
  lambda_other_blocks = 0.1,
  pi_list = seq(0.6, 0.9, by = 0.01),
  K = 100,
  tau = 0.5,
  seed = 1,
  n_cat = 3,
  implementation = PenalisedGraphical,
  start = "warm",
  scale = TRUE,
  resampling = "subsampling",
  cpss = FALSE,
  PFER_method = "MB",
  PFER_thr = Inf,
  FDP_thr = Inf,
  Lambda_cardinal = 50,
  lambda_max = NULL,
  lambda_path_factor = 0.001,
  max_density = 0.5,
  n_cores = 1,
  output_data = FALSE,
  verbose = TRUE,
  beep = NULL,
  ...
)
}
\arguments{
\item{xdata}{data matrix with observations as rows and variables as columns.
For multi-block stability selection, the variables in data have to be
ordered by group.}

\item{pk}{optional vector encoding the grouping structure. Only used for
multi-block stability selection where \code{pk} indicates the number of
variables in each group. If \code{pk=NULL}, single-block stability
selection is performed.}

\item{Lambda}{matrix of parameters controlling the level of sparsity in the
underlying feature selection algorithm specified in \code{implementation}.
If \code{Lambda=NULL} and \code{implementation=PenalisedGraphical},
\code{\link{LambdaGridGraphical}} is used to define a relevant grid.
\code{Lambda} can be provided as a vector or a matrix with
\code{length(pk)} columns.}

\item{lambda_other_blocks}{optional vector of parameters controlling the
level of sparsity in neighbour blocks for the multi-block procedure. To use
jointly a specific set of parameters for each block,
\code{lambda_other_blocks} must be set to \code{NULL} (not recommended).
Only used for multi-block stability selection, i.e. if \code{length(pk)>1}.}

\item{pi_list}{vector of thresholds in selection proportions. If
\code{n_cat=3}, these values must be \code{>0.5} and \code{<1}. If
\code{n_cat=2}, these values must be \code{>0} and \code{<1}.}

\item{K}{number of resampling iterations.}

\item{tau}{subsample size. Only used if \code{resampling="subsampling"} and
\code{cpss=FALSE}.}

\item{seed}{value of the seed to initialise the random number generator and
ensure reproducibility of the results (see \code{\link[base]{set.seed}}).}

\item{n_cat}{number of categories used to compute the stability score.
Possible values are 2 or 3.}

\item{implementation}{function to use for graphical modelling. If
\code{implementation=PenalisedGraphical}, the algorithm implemented in
\code{\link[glassoFast]{glassoFast}} is used for regularised estimation of
a conditional independence graph. Alternatively, a user-defined function
can be provided.}

\item{start}{character string indicating if the algorithm should be
initialised at the estimated (inverse) covariance with previous penalty
parameters (\code{start="warm"}) or not (\code{start="cold"}). Using
\code{start="warm"} can speed-up the computations, but could lead to
convergence issues (in particular with small \code{Lambda_cardinal}). Only
used for \code{implementation=PenalisedGraphical} (see argument
\code{"start"} in \code{\link[glassoFast]{glassoFast}}).}

\item{scale}{logical indicating if the correlation (\code{scale=TRUE}) or
covariance (\code{scale=FALSE}) matrix should be used as input of
\code{\link[glassoFast]{glassoFast}} if
\code{implementation=PenalisedGraphical}. Otherwise, this argument must be
used in the function provided in \code{implementation}.}

\item{resampling}{resampling approach. Possible values are:
\code{"subsampling"} for sampling without replacement of a proportion
\code{tau} of the observations, or \code{"bootstrap"} for sampling with
replacement generating a resampled dataset with as many observations as in
the full sample. Alternatively, this argument can be a function to use for
resampling. This function must use arguments named \code{data} and
\code{tau} and return the IDs of observations to be included in the
resampled dataset.}

\item{cpss}{logical indicating if complementary pair stability selection
should be done. For this, the algorithm is applied on two non-overlapping
subsets of half of the observations. A feature is considered as selected if
it is selected for both subsamples. With this method, the data is split
\code{K/2} times (\code{K} models are fitted). Only used if
\code{PFER_method="MB"}.}

\item{PFER_method}{method used to compute the upper-bound of the expected
number of False Positives (or Per Family Error Rate, PFER). If
\code{PFER_method="MB"}, the method proposed by Meinshausen and Bühlmann
(2010) is used. If \code{PFER_method="SS"}, the method proposed by Shah and
Samworth (2013) under the assumption of unimodality is used.}

\item{PFER_thr}{threshold in PFER for constrained calibration by error
control. If \code{PFER_thr=Inf} and \code{FDP_thr=Inf}, unconstrained
calibration is used (the default).}

\item{FDP_thr}{threshold in the expected proportion of falsely selected
features (or False Discovery Proportion) for constrained calibration by
error control. If \code{PFER_thr=Inf} and \code{FDP_thr=Inf}, unconstrained
calibration is used (the default).}

\item{Lambda_cardinal}{number of values in the grid of parameters controlling
the level of sparsity in the underlying algorithm. Only used if
\code{Lambda=NULL}.}

\item{lambda_max}{optional maximum value for the grid in penalty parameters.
If \code{lambda_max=NULL}, the maximum value is set to the maximum
covariance in absolute value. Only used if
\code{implementation=PenalisedGraphical} and \code{Lambda=NULL}.}

\item{lambda_path_factor}{multiplicative factor used to define the minimum
value in the grid.}

\item{max_density}{threshold on the density. The grid is defined such that
the density of the estimated graph does not exceed max_density.}

\item{n_cores}{number of cores to use for parallel computing (see
\code{\link[parallel]{mclapply}}). Only available on Unix systems.}

\item{output_data}{logical indicating if the input datasets \code{xdata} and
\code{ydata} should be included in the output.}

\item{verbose}{logical indicating if a loading bar and messages should be
printed.}

\item{beep}{sound indicating the end of the run. Possible values are:
\code{NULL} (no sound) or an integer between 1 and 11 (see argument
\code{sound} in \code{\link[beepr]{beep}}).}

\item{...}{additional parameters passed to the functions provided in
\code{implementation} or \code{resampling}.}
}
\value{
An object of class \code{graphical_model}. A list with: \item{S}{a
  matrix of the best stability scores for different (sets of) parameters
  controlling the level of sparsity in the underlying algorithm.}
  \item{Lambda}{a matrix of parameters controlling the level of sparsity in
  the underlying algorithm.} \item{Q}{a matrix of the average number of
  selected features by the underlying algorithm with different parameters
  controlling the level of sparsity.} \item{Q_s}{a matrix of the calibrated
  number of stably selected features with different parameters controlling
  the level of sparsity.} \item{P}{a matrix of calibrated thresholds in
  selection proportions for different parameters controlling the level of
  sparsity in the underlying algorithm.} \item{PFER}{a matrix of upper-bounds
  in PFER of calibrated stability selection models with different parameters
  controlling the level of sparsity.} \item{FDP}{a matrix of upper-bounds in
  FDP of calibrated stability selection models with different parameters
  controlling the level of sparsity.} \item{S_2d}{a matrix of stability
  scores obtained with different combinations of parameters. Columns
  correspond to different thresholds in selection proportions.}
  \item{PFER_2d}{a matrix of upper-bounds in FDP obtained with different
  combinations of parameters. Columns correspond to different thresholds in
  selection proportions. Only returned if \code{length(pk)=1}.}
  \item{FDP_2d}{a matrix of upper-bounds in PFER obtained with different
  combinations of parameters. Columns correspond to different thresholds in
  selection proportions. Only returned if \code{length(pk)=1}.}
  \item{selprop}{an array of selection proportions. Rows and columns
  correspond to nodes in the graph. Indices along the third dimension
  correspond to different parameters controlling the level of sparsity in the
  underlying algorithm.} \item{sign}{a matrix of signs of Pearson's
  correlations estimated from \code{xdata}.} \item{method}{a list with
  \code{type="graphical_model"} and values used for arguments
  \code{implementation}, \code{start}, \code{resampling}, \code{cpss} and
  \code{PFER_method}.} \item{params}{a list with values used for arguments
  \code{K}, \code{pi_list}, \code{tau}, \code{n_cat}, \code{pk}, \code{n}
  (number of observations in \code{xdata}), \code{PFER_thr}, \code{FDP_thr},
  \code{seed}, \code{lambda_other_blocks}, and \code{Sequential_template}.}
  The rows of \code{S}, \code{Lambda}, \code{Q}, \code{Q_s}, \code{P},
  \code{PFER}, \code{FDP}, \code{S_2d}, \code{PFER_2d} and \code{FDP_2d}, and
  indices along the third dimension of \code{selprop} are ordered in the same
  way and correspond to parameter values stored in \code{Lambda}. For
  multi-block inference, the columns of \code{S}, \code{Lambda}, \code{Q},
  \code{Q_s}, \code{P}, \code{PFER} and \code{FDP}, and indices along the
  third dimension of \code{S_2d} correspond to the different blocks.
}
\description{
Performs stability selection for graphical models. The underlying graphical
model (e.g. graphical LASSO) is run with different combinations of parameters
controlling the sparsity (e.g. penalty parameter) and thresholds in selection
proportions. These two hyper-parameters are jointly calibrated by
maximisation of the stability score.
}
\details{
In stability selection, a feature selection algorithm is fitted on
  \code{K} subsamples (or bootstrap samples) of the data with different
  parameters controlling the sparsity (\code{Lambda}). For a given (set of)
  sparsity parameter(s), the proportion out of the \code{K} models in which
  each feature is selected is calculated. Features with selection proportions
  above a threshold pi are considered stably selected. The stability
  selection model is controlled by the sparsity parameter(s) for the
  underlying algorithm, and the threshold in selection proportion:

  \eqn{V_{\lambda, \pi} = \{ j: p_{\lambda}(j) \ge \pi \} }

  These parameters can be calibrated by maximisation of a stability score
  (see \code{\link{StabilityScore}}) derived from the likelihood under the
  assumption of uniform (uninformative) selection:

  \eqn{S_{\lambda, \pi} = -log(L_{\lambda, \pi})}

  It is strongly recommended to examine the calibration plot carefully to
  check that the grids of parameters \code{Lambda} and \code{pi_list} do not
  restrict the calibration to a region that would not include the global
  maximum (see \code{\link{CalibrationPlot}}). In particular, the grid
  \code{Lambda} may need to be extended when the maximum stability is
  observed on the left or right edges of the calibration heatmap.

  To control the expected number of False Positives (Per Family Error Rate)
  in the results, a threshold \code{PFER_thr} can be specified. The
  optimisation problem is then constrained to sets of parameters that
  generate models with an upper-bound in PFER below \code{PFER_thr} (see
  Meinshausen and Bühlmann (2010) and Shah and Samworth (2013)).

  Possible resampling procedures include defining (i) \code{K} subsamples of
  a proportion \code{tau} of the observations, (ii) \code{K} bootstrap samples
  with the full sample size (obtained with replacement), and (iii) \code{K/2}
  splits of the data in half for complementary pair stability selection (see
  arguments \code{resampling} and \code{cpss}). In complementary pair
  stability selection, a feature is considered selected at a given resampling
  iteration if it is selected in the two complementary subsamples.

  To ensure reproducibility of the results, the starting number of the random
  number generator is set to \code{seed}.

  For parallelisation, stability selection with different sets of parameters
  can be run on \code{n_cores} cores. This relies on forking with
  \code{\link[parallel]{mclapply}} (specific to Unix systems). Alternatively,
  the function can be run manually with different \code{seed}s and all other
  parameters equal. The results can then be combined using
  \code{\link{Combine}}.

  The generated network can be converted into
  \code{\link[igraph:igraph-package]{igraph}} object using
  \code{\link{Graph}}. The R package
  \code{\link[visNetwork:visDocumentation]{visNetwork}} can be used for
  interactive network visualisation (see examples in \code{\link{Graph}}).
}
\examples{
\donttest{
oldpar <- par(no.readonly = TRUE)
par(mar = rep(7, 4))

## Single-block stability selection

# Data simulation
set.seed(1)
simul <- SimulateGraphical(n = 100, pk = 20, nu_within = 0.1)

# Stability selection
stab <- GraphicalModel(xdata = simul$data)
print(stab)

# Calibration heatmap
CalibrationPlot(stab)

# Visualisation of the results
summary(stab)
plot(stab)

# Extraction of adjacency matrix or igraph object
Adjacency(stab)
Graph(stab)


## Multi-block stability selection

# Data simulation
set.seed(1)
simul <- SimulateGraphical(pk = c(10, 10))

# Stability selection
stab <- GraphicalModel(xdata = simul$data, pk = c(10, 10), Lambda_cardinal = 10)
print(stab)

# Calibration heatmap
# par(mfrow = c(1, 3))
CalibrationPlot(stab) # Producing three plots

# Visualisation of the results
summary(stab)
plot(stab)

# Multi-parameter stability selection (not recommended)
Lambda <- matrix(c(0.8, 0.6, 0.3, 0.5, 0.4, 0.3, 0.7, 0.5, 0.1), ncol = 3)
stab <- GraphicalModel(
  xdata = simul$data, pk = c(10, 10),
  Lambda = Lambda, lambda_other_blocks = NULL
)
stab$Lambda


## Example with user-defined function: shrinkage estimation and selection

# Data simulation
set.seed(1)
simul <- SimulateGraphical(n = 100, pk = 20, nu_within = 0.1)

if (requireNamespace("corpcor", quietly = TRUE)) {
  # Writing user-defined algorithm in a portable function
  ShrinkageSelection <- function(xdata, Lambda, ...) {
    mypcor <- corpcor::pcor.shrink(xdata, verbose = FALSE)
    adjacency <- array(NA, dim = c(nrow(mypcor), ncol(mypcor), nrow(Lambda)))
    for (k in 1:nrow(Lambda)) {
      A <- ifelse(abs(mypcor) >= Lambda[k, 1], yes = 1, no = 0)
      diag(A) <- 0
      adjacency[, , k] <- A
    }
    return(list(adjacency = adjacency))
  }

  # Running the algorithm without stability
  myglasso <- GraphicalAlgo(
    xdata = simul$data,
    Lambda = matrix(c(0.05, 0.1), ncol = 1), implementation = ShrinkageSelection
  )

  # Stability selection using shrinkage estimation and selection
  stab <- GraphicalModel(
    xdata = simul$data, Lambda = matrix(c(0.01, 0.05, 0.1), ncol = 1),
    implementation = ShrinkageSelection
  )
  stable_adjacency <- Adjacency(stab)
}


## Example for the detection of block structure

# Data simulation
set.seed(1)
pk <- sample(1:5, size = 5, replace = TRUE)
simul <- SimulateComponents(
  n = 100, pk = pk,
  v_within = c(0.7, 0.8), v_sign = -1
)

# Data visualisation
Heatmap(
  mat = cor(simul$data),
  col = c("navy", "white", "red"),
  legend_range = c(-1, 1)
)

par(oldpar)
}
}
\references{
\insertRef{ourstabilityselection}{sharp}

  \insertRef{stabilityselectionSS}{sharp}

  \insertRef{stabilityselectionMB}{sharp}

  \insertRef{GraphicalLasso}{sharp}
}
\seealso{
\code{\link{PenalisedGraphical}}, \code{\link{GraphicalAlgo}},
  \code{\link{LambdaGridGraphical}}, \code{\link{Resample}},
  \code{\link{StabilityScore}} \code{\link{Graph}}, \code{\link{Adjacency}},

Other stability functions: 
\code{\link{BiSelection}()},
\code{\link{Clustering}()},
\code{\link{VariableSelection}()}
}
\concept{stability functions}
