% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quadratic_forms_of_survey_design_objects.R
\name{get_design_quad_form}
\alias{get_design_quad_form}
\title{Determine the quadratic form matrix of a variance estimator for a survey design object}
\usage{
get_design_quad_form(design, variance_estimator, ensure_psd = FALSE)
}
\arguments{
\item{design}{A survey design object created using the 'survey' (or 'srvyr') package,
with class \code{'survey.design'} or \code{'svyimputationList'}. Also accepts two-phase design objects
with class \code{'twophase2'}; see the section below titled "Two-Phase Designs" for
more information about handling of two-phase designs.}

\item{variance_estimator}{The name of the variance estimator
whose quadratic form matrix should be created. \cr
See the section "Variance Estimators" below.
Options include:
\itemize{
  \item{\strong{"Yates-Grundy"}: }{The Yates-Grundy variance estimator based on
  first-order and second-order inclusion probabilities.}
  \item{\strong{"Horvitz-Thompson"}: }{The Horvitz-Thompson variance estimator based on
  first-order and second-order inclusion probabilities.}
  \item{\strong{"Poisson Horvitz-Thompson"}: }{The Horvitz-Thompson variance estimator
  based on assuming Poisson sampling with specified first-order inclusion probabilities.}
  \item{\strong{"Stratified Multistage SRS"}: }{The usual stratified multistage variance estimator
  based on estimating the variance of cluster totals within strata at each stage.}
  \item{\strong{"Ultimate Cluster"}: }{The usual variance estimator based on estimating
  the variance of first-stage cluster totals within first-stage strata.}
  \item{\strong{"Deville-1"}: }{A variance estimator for unequal-probability
  sampling without replacement, described in Matei and Tillé (2005)
  as "Deville 1".}
  \item{\strong{"Deville-2"}: }{A variance estimator for unequal-probability
  sampling without replacement, described in Matei and Tillé (2005)
  as "Deville 2".}
  \item{\strong{"SD1"}: }{The non-circular successive-differences variance estimator described by Ash (2014),
  sometimes used for variance estimation for systematic sampling.}
  \item{\strong{"SD2"}: }{The circular successive-differences variance estimator described by Ash (2014).
  This estimator is the basis of the "successive-differences replication" estimator commonly used
  for variance estimation for systematic sampling.}
}}

\item{ensure_psd}{If \code{TRUE} (the default), ensures
that the result is a positive semidefinite matrix. This
is necessary if the quadratic form is used as an input for
replication methods such as the generalized bootstrap.
For mathematical details, please see the documentation for the function \code{get_nearest_psd_matrix()}.
The approximation method is discussed by Beaumont and Patak (2012)
in the context of forming replicate weights for two-phase samples.
The authors argue that this approximation should
lead to only a small overestimation of variance.}
}
\value{
A matrix representing the quadratic form of a specified variance estimator,
based on extracting information about clustering, stratification,
and selection probabilities from the survey design object.
}
\description{
Determines the quadratic form matrix of a specified variance estimator,
by parsing the information stored in a survey design object created using
the 'survey' package.
}
\section{Variance Estimators}{

The \strong{Horvitz-Thompson} variance estimator:
\deqn{
  v(\hat{Y}) = \sum_{i \in s}\sum_{j \in s} (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) \frac{y_i}{\pi_i} \frac{y_j}{\pi_j}
}
The \strong{Yates-Grundy} variance estimator:
\deqn{
  v(\hat{Y}) = -\frac{1}{2}\sum_{i \in s}\sum_{j \in s} (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) (\frac{y_i}{\pi_i} - \frac{y_j}{\pi_j})^2
}
The \strong{Poisson Horvitz-Thompson} variance estimator
is simply the Horvitz-Thompson variance estimator, but
where \eqn{\pi_{ij}=\pi_i \times \pi_j}, which is the case for Poisson sampling.
\cr \cr
The \strong{Stratified Multistage SRS} variance estimator is the recursive variance estimator
proposed by Bellhouse (1985) and used in the 'survey' package's function \link[survey]{svyrecvar}.
The estimator can be used for any number of sampling stages. For illustration, we describe its use
for two sampling stages.
\deqn{
  v(\hat{Y}) = \hat{V}_1 + \hat{V}_2
}
where
\deqn{
  \hat{V}_1 = \sum_{h=1}^{H} (1 - \frac{n_h}{N_h})\frac{n_h}{n_h - 1} \sum_{i=1}^{n_h} (y_{hi.} - \bar{y}_{hi.})^2
}
and
\deqn{
  \hat{V}_2 = \sum_{h=1}^{H} \frac{n_h}{N_h} \sum_{i=1}^{n_h}v_{hi}(y_{hi.})
}
where \eqn{n_h} is the number of sampled clusters in stratum \eqn{h},
\eqn{N_h} is the number of population clusters in stratum \eqn{h},
\eqn{y_{hi.}} is the weighted cluster total in cluster \eqn{i} of stratum \eqn{h},
\eqn{\bar{y}_{hi.}} is the mean weighted cluster total of stratum \eqn{h},
(\eqn{\bar{y}_{hi.} = \frac{1}{n_h}\sum_{i=1}^{n_h}y_{hi.}}), and
\eqn{v_{hi}(y_{hi.})} is the estimated sampling variance of \eqn{y_{hi.}}.
\cr \cr
The \strong{Ultimate Cluster} variance estimator is simply the stratified multistage SRS
variance estimator, but ignoring variances from later stages of sampling.
\deqn{
  v(\hat{Y}) = \hat{V}_1
}
This is the variance estimator used in the 'survey' package when the user specifies
\code{option(survey.ultimate.cluster = TRUE)} or uses \code{svyrecvar(..., one.stage = TRUE)}.
When the first-stage sampling fractions are small, analysts often omit the finite population corrections \eqn{(1-\frac{n_h}{N_h})}
when using the ultimate cluster estimator.
\cr \cr
The \strong{SD1} and \strong{SD2} variance estimators are "successive difference"
estimators sometimes used for systematic sampling designs.
Ash (2014) describes each estimator as follows:
\deqn{
  \hat{v}_{S D 1}(\hat{Y}) = \left(1-\frac{n}{N}\right) \frac{n}{2(n-1)} \sum_{k=2}^n\left(\breve{y}_k-\breve{y}_{k-1}\right)^2
}
\deqn{
  \hat{v}_{S D 2}(\hat{Y}) = \left(1-\frac{n}{N}\right) \frac{1}{2}\left[\sum_{k=2}^n\left(\breve{y}_k-\breve{y}_{k-1}\right)^2+\left(\breve{y}_n-\breve{y}_1\right)^2\right]
}
where \eqn{\breve{y}_k = y_k/\pi_k} is the weighted value of unit \eqn{k}
with selection probability \eqn{\pi_k}. The SD1 estimator is recommended by Wolter (1984).
The SD2 estimator is the basis of the successive difference replication estimator commonly
used for systematic sampling designs. See Ash (2014) for details.
\cr \cr
For multistage samples, SD1 and SD2 are applied to the clusters at each stage, separately by stratum.
For later stages of sampling, the variance estimate from a stratum is multiplied by the product
of sampling fractions from earlier stages of sampling. For example, at a third stage of sampling,
the variance estimate from a third-stage stratum is multiplied by \eqn{\frac{n_1}{N_1}\frac{n_2}{N_2}},
which is the product of sampling fractions from the first-stage stratum and second-stage stratum.
\cr \cr
The \strong{"Deville-1"} and \strong{"Deville-2"} variance estimators
are clearly described in Matei and Tillé (2005),
and are intended for designs that use
fixed-size, unequal-probability random sampling without replacement.
These variance estimators have been shown to be effective
for designs that use a fixed sample size with a high-entropy sampling method.
This includes most PPSWOR sampling methods,
but unequal-probability systematic sampling is an important exception.

These variance estimators take the following form:
\deqn{
\hat{v}(\hat{Y}) = \sum_{i=1}^{n} c_i (\breve{y}_i - \frac{1}{\sum_{i=k}^{n}c_k}\sum_{k=1}^{n}c_k \breve{y}_k)^2
}
where \eqn{\breve{y}_i = y_i/\pi_i} is the weighted value of the the variable of interest,
and \eqn{c_i} depend on the method used:
\itemize{
\item{\strong{"Deville-1"}: }{
\deqn{c_i=\left(1-\pi_i\right) \frac{n}{n-1}}}
\item{\strong{"Deville-2"}: }{
\deqn{c_i = (1-\pi_i) \left[1 - \sum_{k=1}^{n} \left(\frac{1-\pi_k}{\sum_{k=1}^{n}(1-\pi_k)}\right)^2 \right]^{-1}}}
}
In the case of simple random sampling without replacement (SRSWOR),
these estimators are both identical to the usual stratified multistage SRS estimator
(which is itself a special case of the Horvitz-Thompson estimator).

For multistage samples, "Deville-1" and "Deville-2" are applied to the clusters at each stage, separately by stratum.
For later stages of sampling, the variance estimate from a stratum is multiplied by the product
of sampling probabilities from earlier stages of sampling. For example, at a third stage of sampling,
the variance estimate from a third-stage stratum is multiplied by \eqn{\pi_1 \times \pi_{(2 | 1)}},
where \eqn{\pi_1} is the sampling probability of the first-stage unit
and \eqn{\pi_{(2|1)}} is the sampling probability of the second-stage unit
within the first-stage unit.
}

\section{Two-Phase Designs}{

For a two-phase design, \code{variance_estimator} should be a list of variance estimators' names,
with two elements, such as \code{list('Ultimate Cluster', 'Poisson Horvitz-Thompson')}.
In two-phase designs, only the following estimators may be used for the second phase:
\itemize{
  \item "Ultimate Cluster"
  \item "Stratified Multistage SRS"
  \item "Poisson Horvitz-Thompson"
}
For statistical details on the handling of two-phase designs,
see the documentation for \link[svrep]{make_twophase_quad_form}.
}

\examples{
\dontrun{
# Example 1: Quadratic form for successive-difference variance estimator ----

   data('library_stsys_sample', package = 'svrep')

   ## First, ensure data are sorted in same order as was used in sampling
   library_stsys_sample <- library_stsys_sample[
     order(library_stsys_sample$SAMPLING_SORT_ORDER),
   ]

   ## Create a survey design object
   design_obj <- svydesign(
     data = library_stsys_sample,
     strata = ~ SAMPLING_STRATUM,
     ids = ~ 1,
     fpc = ~ STRATUM_POP_SIZE
   )

   ## Obtain quadratic form
   quad_form_matrix <- get_design_quad_form(
     design = design_obj,
     variance_estimator = "SD2"
   )

   ## Estimate variance of estimated population total
   y <- design_obj$variables$LIBRARIA
   wts <- weights(design_obj, type = 'sampling')
   y_wtd <- as.matrix(y) * wts
   y_wtd[is.na(y_wtd)] <- 0

   pop_total <- sum(y_wtd)

   var_est <- t(y_wtd) \%*\% quad_form_matrix \%*\% y_wtd
   std_error <- sqrt(var_est)

   print(pop_total); print(std_error)

   # Compare to estimate from assuming SRS
   svytotal(x = ~ LIBRARIA, na.rm = TRUE,
            design = design_obj)

# Example 2: Two-phase design (second phase is nonresponse) ----

  ## Estimate response propensities, separately by stratum
  library_stsys_sample[['RESPONSE_PROB']] <- svyglm(
    design = design_obj,
    formula = I(RESPONSE_STATUS == "Survey Respondent") ~ SAMPLING_STRATUM,
    family = quasibinomial('logistic')
  ) |> predict(type = 'response')

  ## Create a survey design object,
  ## where nonresponse is treated as a second phase of sampling
  twophase_design <- twophase(
    data = library_stsys_sample,
    strata = list(~ SAMPLING_STRATUM, NULL),
    id = list(~ 1, ~ 1),
    fpc = list(~ STRATUM_POP_SIZE, NULL),
    probs = list(NULL, ~ RESPONSE_PROB),
    subset = ~ I(RESPONSE_STATUS == "Survey Respondent")
  )

  ## Obtain quadratic form for the two-phase variance estimator,
  ## where first phase variance contribution estimated
  ## using the successive differences estimator
  ## and second phase variance contribution estimated
  ## using the Horvitz-Thompson estimator
  ## (with joint probabilities based on assumption of Poisson sampling)
  get_design_quad_form(
    design = twophase_design,
    variance_estimator = list(
      "SD2",
      "Poisson Horvitz-Thompson"
    )
  )
}
}
\references{
- Ash, S. (2014). "\emph{Using successive difference replication for estimating variances}."
\strong{Survey Methodology}, Statistics Canada, 40(1), 47–59.
\cr \cr
- Beaumont, Jean-François, and Zdenek Patak. (2012). "\emph{On the Generalized Bootstrap for Sample Surveys with Special Attention to Poisson Sampling: Generalized Bootstrap for Sample Surveys.}"
\strong{International Statistical Review} 80 (1): 127–48.
\cr \cr
- Bellhouse, D.R. (1985). "\emph{Computing Methods for Variance Estimation in Complex Surveys}."
\strong{Journal of Official Statistics}, Vol.1, No.3.
\cr \cr
- Särndal, C.-E., Swensson, B., & Wretman, J. (1992). "\emph{Model Assisted Survey Sampling}." Springer New York.
}
