% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mosalloc.R
\name{mosalloc}
\alias{mosalloc}
\title{Multiobjective sample allocation for constraint multivariate and
multidomain optimal allocation in survey sampling}
\usage{
mosalloc(
  D,
  d,
  A = NULL,
  a = NULL,
  C = NULL,
  c = NULL,
  l = 2,
  u = NULL,
  opts = list(sense = "max_precision", f = NULL, df = NULL, Hf = NULL, init_w = 1,
    mc_cores = 1L, pm_tol = 1e-05, max_iters = 100L, print_pm = FALSE)
)
}
\arguments{
\item{D}{(type: \code{matrix})
The objective matrix. A matrix of either precision or cost units.}

\item{d}{(type: \code{vector})
The objective vector. A vector of either fixed precision components
(e.g. finite population corrections) or fixed costs.}

\item{A}{(type: \code{matrix})
A matrix of precision units for precision constraints.}

\item{a}{(type: \code{vector})
The right-hand side vector ofthe precision constraints.}

\item{C}{(type: \code{matrix})
A matrix of cost coefficients for cost constraints}

\item{c}{(type: \code{vector})
The right-hand side vector of the cost constraints.}

\item{l}{(type: \code{vector})
A vector of lower box constraints.}

\item{u}{(type: \code{vector})
A vector of upper box constraints.}

\item{opts}{(type: \code{list})
The options used by the algorithms:
\cr \code{$sense} (type: \code{character}) Sense of optimization
(default = \code{"max_precision"}, alternative \code{"min_cost"}).
\cr \code{$f} (type: \code{function}) Decision functional over the objective
vector (default = \code{NULL}).
\cr \code{$df} (type: \code{function})
The gradient of f (default = NULL).
\cr \code{$Hf} (type: \code{function})
The Hesse matrix of f (default = NULL).
\cr \code{$init_w} (type: \code{numeric} or \code{matrix})
Preference weightings (default = \code{1}; The weight for first objective
component must be 1).
\cr \code{$mc_cores} (type: \code{integer})
The number of cores for parallelizing multiple input weightings stacked
rowwise (default = \code{1L}).
\cr \code{$pm_tol} (type: \code{numeric})
The tolerance for the projection method (default = \code{1e-5}).
\cr \code{max_iters} (type: \code{integer})
The maximum number of iterations (default = \code{100L}).
\cr \code{$print_pm} (type: \code{logical}) A \code{TRUE} or \code{FALSE}
statement whether iterations of the projection method should be printed
(default = \code{FALSE}).}
}
\value{
\code{mosalloc()} returns a list containing the following
components:

\code{$w} The initial preference weighting \code{opts$init_w}.

\code{$n} The vector of optimal sample sizes.

\code{$J} The optimal objective vector.

\code{$Objective} The objective value with respect to decision
funtional f. \code{NULL} if \code{opts$f = NULL}.

\code{$Utopian} The component-wise univariate optimal
objective vector. \code{NULL} if \code{opts$f = NULL}.

\code{$Normal} The vector normal to the Pareto frontier at
\code{$J}.

\code{$dfJ} The gradient of \code{opts$f} evaluated at \code{$J}.

\code{$Sensitivity} The dual variables of the objectives and
constraints.

\code{$Qbounds} The Quality bounds of the Lorentz cones.

\code{$Dbounds} The weighted objective constraints ($w * $J).

\code{$Scalepar} An internal scaling parameter.

\code{$Ecosolver} A list of ECOSolveR returns including:
\cr \code{...$Ecoinfostring} The info string of
\code{ECOSolveR::ECOS_csolve()}.
\cr \code{...$Ecoredcodes} The redcodes of \code{ECOSolveR::ECOS_csolve()}.
\cr \code{...$Ecosummary} Problem summary of \code{ECOSolveR::ECOS_csolve()}.

\code{$Timing} Run time info.

\code{$Iteration} A list of internal iterates. \code{NULL} if
\code{opts$f = NULL}.
}
\description{
Computes solutions to standard sample allocation problems
under various precision and cost restrictions. The input data is
transformed and parsed to the Embedded COnic Solver (ECOS) from the
'ECOSolveR' package. Multiple survey purposes can be optimized
simultaneously through a weighted Chebyshev minimization. Note that in
the case of multiple objectives, \code{mosalloc()} does not necessarily
lead to Pareto optimality. This highly depends on the problem structure.
A strong indicator for Pareto optimality is when the weighted objective
values given by \code{Dbounds} are constant over all objective
components or when all components of \code{Qbounds} equal \code{1}.
In addition, \code{mosalloc()} can handle twice-differential convex
decision functionals (in which case Pareto optimality is ensured).
\code{mosalloc()} returns dual variables, enabling a detailed sensitivity
analysis.
}
\note{
\strong{Precision optimization} \emph{(\code{opts$sense == "max_precision"},
\code{opts$f == NULL})}

The mathematical problem solved is
\deqn{\min_{n, z, t}\{t: Dz-d\leq w^{-1}t, Az\leq a, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}}
with \eqn{\textbf{1}\leq l\leq u \leq N}, where
\itemize{
\item \eqn{n,z\in\mathbb{R}^m} and \eqn{t\in\mathbb{R}} are the
optimization variables,
\item \eqn{D\in\{M\in\mathbb{R}^{k_D\times m}: \sum_i M_{ij}>0\, \forall j,
\sum_j M_{ij}>0\, \forall i\}} a matrix of nonnegative objective precision
components with \eqn{k_D} the number of precision objectives
(the number of variables of interest),
\item \eqn{d\in\mathbb{R}^{k_D}} the objective right hand-side (RHS), e.g.
the finite population correction (fpc),
\item \eqn{A\in\mathbb{R}^{k_A\times m}} a nonnegative precision matrix with
\eqn{k_A} the number of precision constraints,
\eqn{a\in \mathbb{R}^{k_A}} the corresponding RHS, e.g. fpc + (upper bound to
the coefficient of variation)^2,
\item \eqn{C\in\mathbb{R}^{k_C\times m}} a cost matrix with \eqn{k_C} the number
of cost constraint,
\item \eqn{c\in\mathbb{R}^{k_C}} the corresponding RHS,
\item \eqn{l,u\in\mathbb{R}^m} the bounds to the sample size vector \eqn{n},
\item \eqn{N\in\mathbb{N}^m} the vector of population sizes, and
\item \eqn{w} a given strictly positive preference weighting.
}

Special cases of this formulation are
\itemize{
\item Neyman-Tschuprow allocation (Neyman, 1934 and Tschuprow, 1923):
\deqn{\min_n \Big\{\sum_{h=1}^H
\Big(\frac{N_h^2S_h^2}{n_h}-N_hS_h^2\Big):\sum_{h=1}^H n_h \leq c\Big\}
\quad \Leftrightarrow \quad \min_{n, z, t}\{t: Dz-d\leq t, Cn\leq c,
1\leq n_iz_i\, \forall i\}} with \eqn{D = (N_1^2S_1^2,\dots,N_H^2S_H^2)},
\eqn{d = \sum_{h=1}^H N_hS_h^2}, \eqn{C = (1,\dots,1)} and \eqn{c} a
maximum sample size. Here, \eqn{H} is the number of strata, \eqn{N_h}
the size of stratum \eqn{h} and \eqn{S_h^2} the variance of the variable
of interest in stratum \eqn{h}.
\item box-constrained optimal allocation:
\deqn{\min_{n, z, t}\{t: Dz-d\leq t, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}} with
\eqn{D = (N_1^2S_1^2,\dots,N_H^2S_H^2)},
\eqn{d = \sum_{h=1}^H N_hS_h^2}, \eqn{C = (1,\dots,1)} and \eqn{c} a
maximum sample size (cf. Srikantan, 1963 and Münnich et al., 2012).
Here, \eqn{l} and \eqn{u} are bounds to the optimal sample size vector.
\item cost and precision constrained univariate optimal allocation:
\deqn{\min_{n, z, t}\{t: Dz-d\leq t, Az\leq a, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}} with \eqn{k_D = 1}
(cf. Willems, 2025, Chapter 3).
\item multivariate optimal allocation with weighted sum scalarization:
\deqn{\min_{n, z, t}\{t: w^\top Dz-w^\top d\leq t, Az\leq a, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}} where \eqn{w\in\mathbb{R}^{k_D}}
is a strictly positive preference weighting (cf. Folks and Antel, 1965,
and Rupp, 2018). Note that for this case the problem reduces to cost and
precision constrained univariate optimal allocation. Solutions are ensured
to be optimal in the Pareto sense.
\item box-constraint two-stage cluster sampling:
\deqn{\quad \quad \quad\min_{n_\textbf{I},n_\textbf{II}}
\Big\{\Big(\frac{N_\textbf{I}^2
S_\textbf{I}^2}{n_\textbf{I}}-N_\textbf{I}S_\textbf{I}^2\Big) +
\frac{N_\textbf{I}}{n_\textbf{I}}\sum_{j=1}^{N_\textbf{I}}
\Big(\frac{N_{\textbf{II}j}^2S_{\textbf{II}j}^2}{n_{\textbf{II}j}}-
N_{\textbf{II}j}S_{\textbf{II}j}^2\Big):
c_{\textbf{I}}n_\textbf{I} + \frac{n_\textbf{I}}{N_\textbf{I}}
\sum_{j=1}^{N_\textbf{I}}c_{\textbf{II}j}n_{\textbf{II}j} \leq
c_\textrm{max},\hspace{3.5cm}}
\deqn{\hspace{9.1cm} l_\textbf{I}\leq n_\textbf{I}\leq u_\textbf{I},
l_\textbf{II}\leq n_\textbf{II}\leq u_\textbf{II} \Big\}}
\deqn{\Leftrightarrow \quad \min_{n, z, t}\{t: Dz-d\leq t, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}\hspace{4cm}} with
\eqn{D = (N_\textbf{I}^2S_\textbf{I}^2 -
N_\textbf{I}\sum_{j=1}^{N_\textbf{I}} N_{\textbf{II}j}S_{\textbf{II}j}^2,
N_\textbf{I}N_{\textbf{II}1}^2 S_{\textbf{II}1}^2,\dots,
N_\textbf{I}N_{\textbf{II}N_\textbf{I}}^2S_{\textbf{II}N_\textbf{I}}^2)},
\eqn{d = N_\textbf{I}S_\textbf{I}^2},
\eqn{C = [C_1, C_2]}, where
\eqn{C_1=(c_\textbf{I},l_\textbf{II}^\top,-u_\textbf{II}^\top)^\top} and
\eqn{C_2 = [N_\textbf{I}^{-1}c_\textbf{II}^\top;-\textbf{I};\textbf{I}]}
(\eqn{\textbf{I}} is the identity matrix),
\eqn{c = (c_\textrm{max},0,\dots,0)^\top},
where \eqn{l = (l_\textbf{I},l_\textbf{I}l_\textbf{II}^\top)^\top} and
\eqn{u = (u_\textbf{I},u_\textbf{I}u_\textbf{II}^\top)^\top}
(cf. Willems, 2025, Chapter 3).
Here, \eqn{N_\textbf{I}} is the number of
clusters, \eqn{N_{\textbf{II}j}} the size of cluster \eqn{j},
\eqn{S_\textbf{I}^2} the between cluster variance and
\eqn{S_{\textbf{II}j}^2} the within cluster variances
of the variable of interest. Furthermore, \eqn{c_\textrm{max}} is a
maximum expected cost, \eqn{c_\textbf{I}} a variable cost for sampling
one cluster, and \eqn{c_{\textbf{II}j}} a variable cost for sampling
one unit in cluster \eqn{j}. The optimal number of clusters to be drawn
and the optimal sample sizes are given through \eqn{n = (n_\textbf{I},
n_\textbf{I}n_{\textbf{II}1},\dots, n_\textbf{I}n_{\textbf{II}
N_\textbf{I}})^\top}.
}

For the special cases above, solutions are unique and, thus, Pareto optimal.
For the general multiobjective problem formulation this is not the case.
However, a strong indicator for uniqueness of solutions is
\eqn{n_iz_i = 1\, \forall i} (\code{Qbounds}) or
\eqn{Dz-d = w^{-1}t} (\code{Dbounds}). Uniqueness can be ensured via a
stepwise procedure implemented in \code{mosallocStepwiseFirst()}.\cr

\strong{Precision optimization} \emph{(\code{opts$sense == "max_precision"},
\code{opts$f ==}\eqn{f}, \code{opts$f ==} \eqn{\nabla f},
\code{opts$f ==} \eqn{Hf})}

The mathematical problem solved is
\deqn{\min_{n, z}\{f(Dz-d): Az\leq a, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}}
with components as specified above and where
\eqn{f:\mathbb{R}^{k_D}\rightarrow \mathbb{R}, x \mapsto f(x)}
is a twice-differentiable convex decision functional.
E.g. a \eqn{p}-norm \eqn{f(x) = \lVert x \rVert_p} with
\eqn{p\in\mathbb{N}}.\cr

\strong{Cost optimization} \emph{(\code{opts$sense == "min_cost"})}

The mathematical problem solved is
\deqn{\min_{n, z, t}\{t: Dn-d\leq\textbf{1}t, Az\leq a, Cn\leq c,
1\leq n_iz_i\, \forall i, l\leq n \leq u\}}
with \eqn{1\leq l\leq u \leq N}. Hence, the only difference to
precision optimization is the type of objective constraint
\eqn{Dn-d\leq\textbf{1}t}.

Special cases of this formulation are
\itemize{
\item the cost optimal allocation (possibly multivariate, i.e. \eqn{k_A\geq 2}):
\deqn{\min_{n, z, t}\{t: Dn-d\leq\textbf{1}t, Az\leq a, 1\leq n_iz_i\,
\forall i\}} where \eqn{D^\top} is a vector of stratum-specific sampling
cost and \eqn{d} some fixed cost.
}
}
\examples{
# Artificial population of 50 568 business establishments and 5 business
# sectors (data from Valliant, R., Dever, J. A., & Kreuter, F. (2013).
# Practical tools for designing and weighting survey samples. Springer.
# https://doi.org/10.1007/978-1-4614-6449-5, Example 5.2 pages 133-9)

# See also <https://umd.app.box.com/s/9yvvibu4nz4q6rlw98ac/file/297813512360>
# file: Code 5.3 constrOptim.example.R

Nh <- c(6221, 11738, 4333, 22809, 5467) # stratum sizes
ch <- c(120, 80, 80, 90, 150) # stratum-specific cost of surveying

# Revenues
mh.rev <- c(85, 11, 23, 17, 126) # mean revenue
Sh.rev <- c(170.0, 8.8, 23.0, 25.5, 315.0) # standard deviation revenue

# Employees
mh.emp <- c(511, 21, 70, 32, 157) # mean number of employees
Sh.emp <- c(255.50, 5.25, 35.00, 32.00, 471.00) # std. dev. employees

# Proportion of estabs claiming research credit
ph.rsch <- c(0.8, 0.2, 0.5, 0.3, 0.9)

# Proportion of estabs with offshore affiliates
ph.offsh <- c(0.06, 0.03, 0.03, 0.21, 0.77)

budget <- 300000 # overall available budget
n.min  <- 100 # minimum stratum-specific sample size

# Examples
#----------------------------------------------------------------------------
# Example 1: Minimization of the variation of estimates for revenue subject
# to cost restrictions and precision restrictions to the coefficient of
# variation of estimates for the proportion of businesses with offshore
# affiliates.

l <- rep(n.min, 5) # minimum sample size per stratum
u <- Nh            # maximum sample size per stratum
C <- rbind(ch,
           ch * c(-1, -1, -1, 0, 0))
c <- c(budget,                        # Maximum overall survey budget
       - 0.5 * budget)                # Minimum overall budget for strata 1-3

# We require at maximum 5 \% relative standard error for estimates of
# proportion of businesses with offshore affiliates
A <- matrix(ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2,
 nrow = 1)
a <- sum(ph.offsh * (1 - ph.offsh) * Nh**2/(Nh - 1)
)/sum(Nh * ph.offsh)**2 + 0.05**2

D <- matrix(Sh.rev**2 * Nh**2, nrow = 1) # objective variance components
d <- sum(Sh.rev**2 * Nh) # finite population correction

opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = FALSE)

sol <- mosalloc(D = D, d = d, A = A, a = a, C = C, c = c, l = l, u = u,
                opts = opts)

# Check solution statement of the internal solver to verify feasibility
sol$Ecosolver$Ecoinfostring # [1] "Optimal solution found"

# Check constraints
c(C[1, ] \%*\% sol$n) # [1] 3e+05
c(C[2, ] \%*\% sol$n) # [1] -150000
c(sqrt(A \%*\% (1 / sol$n) - A \%*\% (1 / Nh))) # 5 \% rel. std. err.

#----------------------------------------------------------------------------
# Example 2: Minimization of the maximum relative variation of estimates for
# the total revenue, the number of employee, the number of businesses claimed
# research credit, and the number of businesses with offshore affiliates
# subject to cost restrictions

l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum
C <- rbind(ch, ch * c(-1, -1, -1, 0, 0))
c <- c(budget, - 0.5 * budget)
A <- NULL # no precision constraint
a <- NULL # no precision constraint

# Precision components (Variance / Totals^2) for multidimensional objective
D <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)

d <- as.vector(D \%*\% (1 / Nh)) # finite population correction

opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = FALSE)

sol <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

# Obtain optimal objective value
sol$J # [1] 0.0017058896 0.0004396972 0.0006428475 0.0017058896

# Obtain corresponding normal vector
sol$Normal # [1] 6.983113e-01 1.337310e-11 1.596167e-11 3.016887e-01

# => Revenue and offshore affiliates are dominating the solution with a
#    ratio of approximately 2:1 (sol$Normal[1] / sol$Normal[4])

#----------------------------------------------------------------------------
# Example 3: Example 2 with preference weighting

w <- c(1, 3.85, 3.8, 1.3) # preference weighting
l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum
C <- rbind(ch, ch * c(-1, -1, -1, 0, 0))
c <- c(budget, - 0.5 * budget)
A <- NULL # no precision constraint
a <- NULL # no precision constraint

D <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)

d <- as.vector(D \%*\% (1 / Nh))

opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = w,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = FALSE)

mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

#----------------------------------------------------------------------------
# Example 4: Example 2 with multiple preference weightings for simultaneous
# evaluation

w <- matrix(c(1.0, 1.0, 1.0, 1.0,       # matrix of preference weightings
              1.0, 3.9, 3.9, 1.3,
              0.8, 4.2, 4.8, 1.5,
              1.2, 3.5, 4.8, 2.0,
              2.0, 1.0, 1.0, 2.0), 5, 4, byrow = TRUE)
w <- w / w[,1]     # rescale w (ensure the first weighting to be one)
l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum
C <- rbind(ch, ch * c(-1, -1, -1, 0, 0))
c <- c(budget, - 0.5 * budget)
A <- NULL # no precision constraint
a <- NULL # no precision constraint

D <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)

d <- as.vector(D \%*\% (1 / Nh))

opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = w,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = FALSE)

sols <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)
lapply(sols, function(sol){sol$Qbounds})

#----------------------------------------------------------------------------
# Example 5: Example 2 where a weighted sum scalarization of the objective
# components is minimized

l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum
C <- matrix(ch, nrow = 1)
c <- budget
A <- NULL # no precision constraint
a <- NULL # no precision constraint

# Objective variance components
D <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)

d <- as.vector(D \%*\% (1 / Nh)) # finite population correction

# Simple weighted sum as decision functional
wss <- c(1, 1, 0.5, 0.5) # preference weighting (weighted sum scalarization)

Dw <- wss \%*\% D
dw <- as.vector(wss \%*\% d)

opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 1000L, print_pm = FALSE)

# Solve weighted sum scalarization (WSS) via mosalloc
sol_wss <- mosalloc(D = Dw, d = dw, C = C, c = c, l = l, u = u, opts = opts)

# Obtain optimal objective values
J <- D \%*\% (1 / sol_wss$n) - d

# Reconstruct solution via a weighted Chebyshev minimization
wcm <- J[1] / J
opts = list(sense = "max_precision",
            f = NULL, df = NULL, Hf = NULL,
            init_w = matrix(wcm, 1),
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 1000L, print_pm = FALSE)

sol_wcm <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

# Compare solutions
rbind(t(J), sol_wcm$J)
#            [,1]         [,2]         [,3]        [,4]
# [1,] 0.00155645 0.0004037429 0.0005934474 0.001327165
# [2,] 0.00155645 0.0004037429 0.0005934474 0.001327165

rbind(sol_wss$n, sol_wcm$n)
#          [,1]     [,2]     [,3]     [,4]     [,5]
# [1,] 582.8247 236.6479 116.7866 839.5988 841.4825
# [2,] 582.8226 236.6475 116.7871 839.5989 841.4841

rbind(wss, sol_wcm$Normal / sol_wcm$Normal[1])
#    [,1]      [,2]      [,3]      [,4]
#wss    1 1.0000000 0.5000000 0.5000000
#       1 0.9976722 0.4997552 0.4997462

#----------------------------------------------------------------------------
# Example 6: Example 1 with two subpopulations and a p-norm as decision
# functional

l <- rep(n.min, 5) # minimum sample size per stratum
u <- Nh            # maximum sample size per stratum
C <- rbind(ch, ch * c(-1, -1, -1, 0, 0))
c <- c(budget, - 0.5 * budget)

# At maximum 5 \% relative standard error for estimates of proportion of
# businesses with offshore affiliates
A <- matrix(ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2,
 nrow = 1)
a <- sum(ph.offsh * (1 - ph.offsh) * Nh**2/(Nh - 1)
)/sum(Nh * ph.offsh)**2 + 0.05**2

D <- rbind((Sh.rev**2 * Nh**2)*c(0,0,1,1,0),
           (Sh.rev**2 * Nh**2)*c(1,1,0,0,1))# objective variance components
d <- as.vector(D \%*\% (1 / Nh)) # finite population correction

# p-norm solution
p <- 5 # p-norm
opts = list(sense = "max_precision",
            f = function(x) sum(x**p),
            df = function(x) p * x**(p - 1),
            Hf = function(x) diag(p * (p - 1) * x**(p - 2)),
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 1000L, print_pm = TRUE)

sol <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

c(sol$Normal/sol$dfJ)/mean(c(sol$Normal/sol$dfJ))
# [1] 0.9999972 1.0000028

#----------------------------------------------------------------------------
# Example 7: Example 2 with p-norm as decision functional and only one
# overall cost constraint

l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum
C <- matrix(ch, nrow = 1)
c <- budget
A <- NULL # no precision constraint
a <- NULL # no precision constraint

# Objective precision components
D <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)

d <- as.vector(D \%*\% (1 / Nh)) # finite population correction

# p-norm solution
p <- 5 # p-norm
opts = list(sense = "max_precision",
            f = function(x) sum(x**p),
            df = function(x) p * x**(p - 1),
            Hf = function(x) diag(p * (p - 1) * x**(p - 2)),
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 1000L, print_pm = TRUE)

sol <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

c(sol$Normal/sol$dfJ)/mean(c(sol$Normal/sol$dfJ))
# [1] 1.0014362 0.9780042 1.0197807 1.0007789

#----------------------------------------------------------------------------
# Example 8: Minimization of sample sizes subject to precision constraints

l <- rep(n.min, 5) # minimum sample size ber stratum
u <- Nh            # maximum sample size per stratum

# We require at maximum 4.66 \% relative standard error for the estimate of 
# total revenuee, 5 \% for the number of employees, 3 \% for the proportion of
# businesses claiming research credit, and 3 \% for the proportion of
# businesses with offshore affiliates
A <- rbind(Sh.rev**2 * Nh**2/sum(Nh * mh.rev)**2,
           Sh.emp**2 * Nh**2/sum(Nh * mh.emp)**2,
           ph.rsch * (1 - ph.rsch) * Nh**3/(Nh - 1)/sum(Nh * ph.rsch)**2,
           ph.offsh * (1 - ph.offsh) * Nh**3/(Nh - 1)/sum(Nh * ph.offsh)**2)
a <- as.vector(A\%*\%(1 / Nh) + c(0.0466, 0.05, 0.03, 0.03)**2)

# We do not consider any additional sample size or cost constraints
C <- NULL # no cost constraint
c <- NULL # no cost constraint

# Since we minimize the sample size, we define D and d as follows:
D <- matrix(1, nrow = 1, ncol = length(Nh)) # objective cost components
d <- as.vector(0)                           # vector of possible fixed cost

opts = list(sense = "min_cost", # Sense of optimization is survey cost
            f = NULL,
            df = NULL,
            Hf = NULL,
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = TRUE)

sol <- mosalloc(D = D, d = d, A = A, a = a, l = l, u = u, opts = opts)

sum(sol$n) # [1] 2843.219
sol$J # [1] 2843.219

#----------------------------------------------------------------------------
#----------------------------------------------------------------------------
# Note: Sample size optimization for two-stage cluster sampling can be
#       reduced to the structure of optimal stratified random samplin when
#       considering expected costs. Therefore, mosalloc() can handle such
#       designs. A benefit is that mosalloc() allows relatively complex
#       sample size restrictions such as box constraints for subsampling.
#       Optimal sample sizes at secondary stages have to be reconstructed
#       from sol$n.
#
# Example 9: Optimal number of primary sampling units (PSU) and secondary
# sampling units (SSU) in 2-stage cluster sampling.

set.seed(1234)
pop <- data.frame(value = rnorm(100, 100, 35),
                  cluster = sample(1:4, 100, replace = TRUE))

CI <- 36  # Sampling cost per PSU/cluster
CII <- 10 # Average sampling cost per SSU

NI <- 4                   # Number of PSUs/clusters
NII <- table(pop$cluster) # PSU/cluster sizes

S2I <- var(by(pop$value, pop$cluster, sum)) # between cluster variance
S2II <- by(pop$value, pop$cluster, var)     # within cluster variances

D <- matrix(c(NI**2 * S2I - NI * sum(NII * S2II), NI * NII**2 * S2II), 1)
d <- as.vector(NI * S2I)

C <- cbind(c(CI, rep(2, NI), -NII),
           rbind(rep(CII / NI, 4), -diag(4), diag(4)))
c <- as.vector(c(500, rep(0, 8)))

l <- c(2, rep(4, 4))
u <- c(NI, NI * NII)

opts = list(sense = "max_precision",
            f = NULL,
            df = NULL,
            Hf = NULL,
            init_w = 1,
            mc_cores = 1L, pm_tol = 1e-05,
            max_iters = 100L, print_pm = TRUE)

sol <- mosalloc(D = D, d = d, C = C, c = c, l = l, u = u, opts = opts)

# Optimum number of clusters to be drawn
sol$n[1] # [1] 2.991551

# Optimum number of elements to be drawn within clusters
sol$n[-1] / sol$n[1] # [1] 12.16454 11.60828 15.87949 12.80266

}
\references{
See:

Folks, J.L., Antle, C.E. (1965). \emph{Optimum Allocation of Sampling Units to
Strata when there are R Responses of Interest}. Journal of the American
Statistical Association, 60(309), 225-233.
\doi{10.1080/01621459.1965.10480786}.

Münnich, R., Sachs, E., Wagner, M. (2012). \emph{Numerical solution of optimal
allocation problems in stratified sampling under box constraints}.
AStA Advances in Statistical Analysis, 96, 435-450.
\doi{10.1007/s10182-011-0176-z}.

Neyman, J. (1934). \emph{On the Two Different Aspects of the Representative
Method: The Method of Stratified Sampling and the Method of Purposive
Selection}. Journal of the Royal Statistical Society, 97(4), 558--625.

Tschuprow, A.A. (1923). \emph{On the Mathematical Expectation of the Moments of
Frequency Distribution in the Case of Correlated Observations}. Metron,
2(3,4), 461-493, 646-683.

Rupp, M. (2018). \emph{Optimization for Multivariate and Multi-domain Methods
in Survey Statistics} (Doctoral dissertation). Trier University.
\doi{10.25353/UBTR-8351-5432-14XX}.

Srikantan, K.S. (1963). \emph{A Problem in Optimum Allocation}.
Operations Research, 11(2), 265-274.

Willems, F. (2025). \emph{A Framework for Multiobjective and Uncertain Resource
Allocation Problems in Survey Sampling based on Conic Optimization}
(Doctoral dissertation). Trier University.
\doi{10.25353/ubtr-9200-484c-5c89}.
}
