% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/refmodel.R
\name{init_refmodel}
\alias{init_refmodel}
\title{Custom reference model initialization}
\usage{
init_refmodel(
  z,
  y,
  family,
  x = NULL,
  predfun = NULL,
  dis = NULL,
  offset = NULL,
  wobs = NULL,
  wsample = NULL,
  intercept = TRUE,
  cvfun = NULL,
  cvfits = NULL,
  ...
)
}
\arguments{
\item{z}{Predictor matrix of dimension \code{n}-by-\code{dz} containing the training
features for the reference model. Rows denote the observations and columns the different features.}

\item{y}{Vector of length \code{n} giving the target variable values.}

\item{family}{\link{family} object giving the model family}

\item{x}{Predictor matrix of dimension \code{n}-by-\code{dx} containing the candidate
features for selection (i.e. variables from which to select the submodel).  Rows denote
the observations and columns the different features. Notice that this can
different from \code{z}. If missing, same as \code{z} by default.}

\item{predfun}{Function that takes a \code{nt}-by-\code{dz} test predictor matrix \code{zt} as an input
(\code{nt} = # test points, \code{dz} = number of features in the reference model) and outputs
a \code{nt}-by-\code{S} matrix of expected values for the target variable \code{y},
each column corresponding to one posterior draw for the parameters in the reference model
(the number of draws \code{S} can also be 1). Notice that the output should be computed without
any offsets, these are automatically taken into account internally, e.g. in cross-validation.
If omitted, then the returned object will be 'data reference', that is, it can be used to compute
penalized maximum likelihood solutions such as Lasso (see examples below and in the quickstart vignette.)}

\item{dis}{Vector of length \code{S} giving the posterior draws for the dispersion parameter
in the reference model if there is such a parameter in the model family. For Gaussian
observation model this is the noise std \code{sigma}.}

\item{offset}{Offset to be added to the linear predictor in the projection. (Same as in
function \code{glm}.)}

\item{wobs}{Observation weights. If omitted, equal weights are assumed.}

\item{wsample}{vector of length \code{S} giving the weights for the posterior draws. 
If omitted, equal weights are assumed.}

\item{intercept}{Whether to use intercept. Default is \code{TRUE}.}

\item{cvfun}{Function for performing K-fold cross-validation. The input is an \code{n}-element
vector where each value is an integer between 1 and K denoting the fold for each observation.
Should return a list with K elements, each of which is a list with fields \code{predfun} and
\code{dis} (if the model has a dispersion parameter) which are defined the same way as the arguments 
\code{predfun} and \code{dis} above but are computed using only the corresponding subset of the data. 
More precisely, if \code{cvres} denotes
the list returned by \code{cvfun}, then \code{cvres[[k]]$predfun} and \code{cvres[[k]]$dis} must be computed
using only data from indices \code{folds != k}, where \code{folds} is the \code{n}-element input for
\code{cvfun}. Can be omitted but either \code{cvfun} or \code{cvfits} is needed for K-fold cross-validation
for genuine reference models. See example below.}

\item{cvfits}{A list with K elements, that has the same format as the value returned by \code{cvind} but 
each element of \code{cvfits} must also contain a field \code{omitted} which indicates the indices that
were left out for the corresponding fold. Usually it is easier to specify \code{cvfun} but this can be useful
if you have already computed the cross-validation for the reference model and would like to avoid 
recomputing it. Can be omitted but either \code{cvfun} or \code{cvfits} is needed for K-fold cross-validation
for genuine reference models.}

\item{...}{Currently ignored.}
}
\value{
An object that can be passed to all the functions that
take the reference fit as the first argument, such as \link{varsel}, \link{cv_varsel},
\link[=proj-pred]{proj_predict} and \link[=proj-pred]{proj_linpred}.
}
\description{
Initializes a structure that can be used as a reference fit for the
projective variable selection. This function is provided to allow construction 
of the reference fit from arbitrary fitted models, because only limited
information is needed for the actual projection and variable selection.
}
\examples{
\donttest{

# generate some toy data
set.seed(1)
n <- 100
d <- 10
x <- matrix(rnorm(n*d), nrow=n, ncol=d)
b <- c(c(1,1),rep(0,d-2)) # first two variables are relevant
y <- x \%*\% b + rnorm(n)

# fit the model (this uses rstanarm for posterior inference, 
# but any other tool could also be used)
fit <- stan_glm(y~x, family=gaussian(), data=data.frame(x=I(x),y=y))
draws <- as.matrix(fit)
a <- draws[,1] # intercept
b <- draws[,2:(ncol(draws)-1)] # regression coefficients
sigma <- draws[,ncol(draws)] # noise std

# initialize the reference model structure
predfun <- function(xt) t( b \%*\% t(xt) + a )
ref <- init_refmodel(x,y, gaussian(), predfun=predfun, dis=sigma)

# variable selection based on the reference model
vs <- cv_varsel(ref)
varsel_plot(vs)


# pass in the original data as 'reference'; this allows us to compute 
# traditional estimates like Lasso
dref <- init_refmodel(x,y,gaussian())
lasso <- cv_varsel(dref, method='l1') # lasso
varsel_plot(lasso, stat='rmse')

}

}
