% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kernel_sievePHaipw.R
\name{kernel_sievePHaipw}
\alias{kernel_sievePHaipw}
\title{Nonparametric Kernel-Smoothed Stratified Mark-Specific Proportional Hazards
Model with a Univariate Continuous Mark, Missing-at-Random in Some Failures}
\usage{
kernel_sievePHaipw(
  eventTime,
  eventInd,
  mark,
  tx,
  aux = NULL,
  auxType = NULL,
  zcov = NULL,
  strata = NULL,
  formulaPH = ~tx,
  formulaMiss = NULL,
  formulaAux = NULL,
  tau = NULL,
  tband = NULL,
  hband = NULL,
  nvgrid = 100,
  a = NULL,
  b = NULL,
  ntgrid = NULL,
  nboot = 500,
  seed = NULL,
  maxit = 6
)
}
\arguments{
\item{eventTime}{a numeric vector specifying the observed right-censored
event time.}

\item{eventInd}{a numeric vector indicating the event of interest (1 if
event, 0 if right-censored).}

\item{mark}{a numeric vector specifying a univariate continuous mark subject
to missingness at random. Missing mark values should be set to \code{NA}.
For subjects with \code{eventInd = 0}, the value in \code{mark} should also
be set to \code{NA}.}

\item{tx}{a numeric vector indicating the treatment group (1 if treatment, 0
if placebo).}

\item{aux}{a numeric vector specifying a binary or a continuous auxiliary
covariate which may be potentially useful for predicting missingness, i.e,
the probability of missing, and for informing about the distribution of
missing marks. The mark missingness model only requires that the auxiliary
covariates be observed in subjects who experienced the event of interest.
For subjects with \code{eventInd = 0}, the value in \code{aux} may be set
to \code{NA}. If no auxiliary covariate is used, set \code{aux} to the
default of \code{NULL}.}

\item{auxType}{a character string describing the data type of \code{aux} if
\code{aux} is used. Data types allowed include "binary" and "continuous".
If \code{aux} is not used, \code{auxType} should be set to the default of
\code{NULL}.}

\item{zcov}{a data frame with one row per subject specifying possibly
time-dependent covariate(s) (not including \code{tx}). If no covariate is
used, \code{zcov} should be set to the default of \code{NULL}.}

\item{strata}{a numeric vector specifying baseline strata (\code{NULL} by
default). If specified, a separate mark-specific baseline hazard is assumed
for each stratum. It also allows the models of the probability of
complete-case and of the mark distribution to differ across strata.}

\item{formulaPH}{a one-sided formula object (on the right side of the
\code{~} operator) specifying the linear predictor in the proportional
hazards model. Available variables to be used in the formula include
\code{tx} and variable(s) in \code{zcov}. By default, \code{formulaPH} is
specified as \code{~ tx}.}

\item{formulaMiss}{a one-sided formula object (on the right side of the
\code{~} operator) specifying the linear predictor in the logistic
regression model used for predicting the probability of observing the mark.
\code{formulaMiss} must be provided for the \code{AIPW} method. Available
variables to be used in the formula include \code{eventTime}, \code{tx},
\code{aux}, and variable(s) in \code{zcov}.}

\item{formulaAux}{a one-sided formula object (on the right side of the
\code{~} operator) specifying the variables used for estimating the
conditional distribution of \code{aux}. If \code{aux} is binary, the
formula specifies the linear predictor in a logistic regression and if
\code{aux} is continuous, the formula provides a symbolic description of
variables used in kernel conditional density estimation. \code{formulaAux}
is optional for the \code{AIPW} estimation procedure. Available variables
to be used in the formula include \code{eventTime}, \code{tx}, \code{mark},
and variable(s) in \code{zcov}.}

\item{tau}{a numeric value specifying the duration of study follow-up period.
Failures beyond \code{tau} are treated right-censored. There needs to be at
least \eqn{10\%} of subjects (as a rule of thumb) remaining uncensored by
\code{tau} for the estimation to be stable. By default, \code{tau} is set
as the maximum of \code{eventTime}.}

\item{tband}{a numeric value between 0 and \code{tau} specifying the
bandwidth of the kernel smoothing function over time. By default,
\code{tband} is set as (\code{tau}-min(\code{eventTime}))/5.}

\item{hband}{a numeric value between 0 and 1 specifying the bandwidth of the
kernel smoothing function over mark. By default, \code{hband} is set as
\eqn{4\sigma n^{-1/3}} where \eqn{\sigma} is the estimated standard
deviation of the observed marks for uncensored failure times and \eqn{n} is
the number of subjects in the dataset. Larger bandwidths are recommended
for higher percentages of missing marks.}

\item{nvgrid}{an integer value (100 by default) specifying the number of
equally spaced mark values between the minimum and maximum of the observed
mark for which the treatment effects are evaluated.}

\item{a}{a numeric value between the minimum and maximum of observed mark
values specifying the lower bound of the range for testing the null
hypotheses \eqn{H_{10}: HR(v) = 1} and \eqn{H_{20}: HR(v)} does not depend
on \eqn{v}, for \eqn{v \in [a, b]}; By default, \code{a} is set as
\code{(max(mark) - min(mark))/nvgrid + min(mark)}.}

\item{b}{a numeric value between the minimum and maximum of observed mark
specifying the upper bound of the range for testing the null hypotheses
\eqn{H_{10}: HR(v) = 1} and \eqn{H_{20}: HR(v)} does not depend on \eqn{v},
for \eqn{v \in [a, b]}; By default, \code{b} is set as \eqn{max(mark)}.}

\item{ntgrid}{an integer value (\code{NULL} by default) specifying the number
of equally spaced time points for which the mark-specific baseline hazard
functions are evaluated. If \code{NULL}, baseline hazard functions are not
evaluated.}

\item{nboot}{number of bootstrap iterations (500 by default) for simulating
the distributions of test statistics. If \code{NULL}, the hypotheses tests
are not performed.}

\item{seed}{an integer specifying the random number generation seed for
reproducing the test statistics and p-values. By default, a specific seed
is not set.}

\item{maxit}{Maximum number of iterations to attempt for convergence in
estimation. The default is 6.}
}
\value{
An object of class \code{kernel_sievePH} which can be processed by
  \code{\link{summary.kernel_sievePH}} to obtain or print a summary of the
  results. An object of class \code{kernel_sievePH} is a list containing the
  following components:
\itemize{
\item \code{H10}: a data frame with test statistics (first row) and
corresponding p-values (second row) for testing \eqn{H_{10}: HR(v) = 1} for v
\eqn{\in [a, b]}. Columns \code{TSUP1} and \code{Tint1} include test
statistics and p-values for testing \eqn{H_{10}} vs. \eqn{H_{1a}: HR(v) \neq
1} for any v \eqn{\in [a, b]} (general alternative). Columns \code{TSUP1m}
and \code{Tint1m} include test statistics and p-values for testing
\eqn{H_{10}} vs. \eqn{H_{1m}: HR(v) \leq 1} with strict inequality for some v
in \eqn{[a, b]} (monotone alternative). \code{TSUP1} and \code{TSUP1m} are
based on extensions of the classic Kolmogorov-Smirnov supremum-based test.
\code{Tint1} and \code{Tint1m} are based on generalizations of the
integration-based Cramer-von Mises test. \code{Tint1} and \code{Tint1m}
involve integration of deviations over the whole range of the mark. If
\code{nboot} is \code{NULL}, \code{H10} is returned as \code{NULL}.

\item \code{H20}: a data frame with test statistics (first row) and
corresponding p-values (second row) for testing \eqn{H_{20}}: HR(v) does not
depend on v \eqn{\in [a, b]}. Columns \code{TSUP2} and \code{Tint2} include
test statistics and p-values for testing \eqn{H_{20}} vs. \eqn{H_{2a}}: HR
depends on v \eqn{\in [a, b]} (general alternative). Columns \code{TSUP2m}
and \code{Tint2m} include test statistics and p-values for testing
\eqn{H_{20}} vs. \eqn{H_{2m}}: HR increases as v increases \eqn{\in [a, b]}
(monotone alternative). \code{TSUP2} and \code{TSUP2m} are based on
extensions of the classic Kolmogorov-Smirnov supremum-based test.
\code{Tint2} and \code{Tint2m} are based on generalizations of the
integration-based Cramer-von Mises test. \code{Tint2} and \code{Tint2m}
involve integration of deviations over the whole range of the mark. If
\code{nboot} is \code{NULL}, \code{H20} is returned as \code{NULL}.

\item \code{estBeta}: a data frame summarizing point estimates and standard
errors of the mark-specific coefficients for treatment at equally-spaced
values between the minimum and the maximum of the observed mark values.

\item \code{cBproc1}: a data frame containing equally-spaced mark values in
the column \code{Mark}, test processes \eqn{Q^{(1)}(v)} for observed data in
the column \code{Observed}, and \eqn{Q^{(1)}(v)} for \code{nboot} independent
sets of normal samples in the columns S1, S2, \eqn{\cdots}. If
\code{nboot} is \code{NULL}, \code{cBproc1} is returned as \code{NULL}.

\item \code{cBproc2}: a data frame containing equally-spaced mark values in
the column \code{Mark}, test processes \eqn{Q^{(2)}(v)} for observed data in
the column \code{Observed}, and \eqn{Q^{(2)}(v)} for \code{nboot} independent
sets of normal samples in the columns S1, S2, \eqn{\cdots}. If
\code{nboot} is \code{NULL}, \code{cBproc2} is returned as \code{NULL}.

\item \code{Lambda0}: an array of dimension K x nvgrid x ntgrid for the
  kernel-smoothed baseline hazard function \eqn{\lambda_{0k}, k = 1, \dots,
  K} where \eqn{K} is the number of strata. If \code{ntgrid} is \code{NULL}
(by default), \code{Lambda0} is returned as \code{NULL}.}
}
\description{
\code{kernel_sievePH} implements estimation methods of Sun and Gilbert (2012)
and hypothesis testing methods of Gilbert and Sun (2015) for a mark-specific
proportional hazards model accommodating that some failures have a missing
mark. The methods allow separate baseline mark-specific hazard functions for
different baseline subgroups. Missing marks are handled via augmented IPW
(AIPW) approach.
}
\details{
\code{kernel_sievePH} analyzes data from a randomized
  placebo-controlled trial that evaluates treatment efficacy for a
  time-to-event endpoint with a continuous mark. The parameter of interest is
  the ratio of the conditional mark-specific hazard functions
  (treatment/placebo), which is based on a stratified mark-specific
  proportional hazards model. This model assumes no parametric form for the
  baseline hazard function nor the treatment effect across different mark
  values. For data with missing marks, the estimation procedure leverages
  auxiliary predictors of whether the mark is observed and augments the IPW
  estimator with auxiliary predictors of the missing mark value.
}
\examples{
set.seed(20240410)
beta <- 2.1
gamma <- -1.3
n <- 200
tx <- rep(0:1, each = n / 2)
tm <- c(rexp(n / 2, 0.2), rexp(n / 2, 0.2 * exp(gamma)))
cens <- runif(n, 0, 15)
eventTime <- pmin(tm, cens, 3)
eventInd <- as.numeric(tm <= pmin(cens, 3))
alpha <- function(b){ log((1 - exp(-2)) * (b - 2) / (2 * (exp(b - 2) - 1))) }
mark0 <- log(1 - (1 - exp(-2)) * runif(n / 2)) / (-2)
mark1 <- log(1 + (beta - 2) * (1 - exp(-2)) * runif(n / 2) / (2 * exp(alpha(beta)))) /
  (beta - 2)
mark <- ifelse(eventInd == 1, c(mark0, mark1), NA)
# the true TE(v) curve underlying the data-generating mechanism is:
# TE(v) = 1 - exp{alpha(beta) + beta * v + gamma}

# a binary auxiliary covariate
A <- sapply(exp(-0.5 - 0.2 * mark) / (1 + exp(-0.5 - 0.2 * mark)),
            function(p){ ifelse(is.na(p), NA, rbinom(1, 1, p)) })
linPred <- 1 + 0.4 * tx - 0.2 * A
probs <- exp(linPred) / (1 + exp(linPred))
R <- rep(NA, n)
while (sum(R, na.rm = TRUE) < 10){
  R[eventInd == 1] <- sapply(probs[eventInd == 1],
                             function(p){ rbinom(1, 1, p) })
}
# a missing-at-random mark
mark[eventInd == 1] <- ifelse(R[eventInd == 1] == 1, mark[eventInd == 1], NA)

# AIPW estimation; auxiliary covariate is used (not required)
fit <- kernel_sievePHaipw(eventTime, eventInd, mark, tx, aux = A,
                          auxType = "binary", formulaMiss = ~ eventTime,
                          formulaAux = ~ eventTime + tx + mark,
                          tau = 3, tband = 0.5, hband = 0.3, nvgrid = 20,
                          nboot = 20)

}
\references{
Gilbert, P. B. and Sun, Y. (2015). Inferences on relative failure
  rates in stratified mark-specific proportional hazards models with missing
  marks, with application to human immunodeficiency virus vaccine efficacy
trials. \emph{Journal of the Royal Statistical Society Series C: Applied
Statistics}, 64(1), 49-73.

  Sun, Y. and Gilbert, P. B. (2012). Estimation of stratified mark‐specific
proportional hazards models with missing marks. \emph{Scandinavian Journal of
Statistics}, 39(1), 34-52.

  Yang, G., Sun, Y., Qi, L., & Gilbert, P. B. (2017). Estimation of
  stratified mark-specific proportional hazards models under two-phase
  sampling with application to HIV vaccine efficacy trials. \emph{Statistics
  in biosciences}, 9, 259-283.
}
