% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sar.R
\name{simsar}
\alias{simsar}
\title{Simulating data from linear-in-mean models with social interactions}
\usage{
simsar(formula, Glist, theta, cinfo = TRUE, data)
}
\arguments{
\item{formula}{a class object \link[stats]{formula}: a symbolic description of the model. \code{formula} must be as, for example, \code{y ~ x1 + x2 + gx1 + gx2}
where \code{y} is the endogenous vector and \code{x1}, \code{x2}, \code{gx1} and \code{gx2} are control variables, which can include contextual variables, i.e. averages among the peers.
Peer averages can be computed using the function \code{\link{peer.avg}}.}

\item{Glist}{The network matrix. For networks consisting of multiple subnets, \code{Glist} can be a list of subnets with the \code{m}-th element being an \code{ns*ns} adjacency matrix, where \code{ns} is the number of nodes in the \code{m}-th subnet.}

\item{theta}{a vector defining the true value of \eqn{\theta = (\lambda, \Gamma, \sigma)} (see the model specification in details).}

\item{cinfo}{a Boolean indicating whether information is complete (\code{cinfo = TRUE}) or incomplete (\code{cinfo = FALSE}). In the case of incomplete information, the model is defined under rational expectations.}

\item{data}{an optional data frame, list or environment (or object coercible by \link[base]{as.data.frame} to a data frame) containing the variables
in the model. If not found in data, the variables are taken from \code{environment(formula)}, typically the environment from which \code{simsar} is called.}
}
\value{
A list consisting of:
\item{y}{the observed count data.}
\item{Gy}{the average of y among friends.}
}
\description{
\code{simsar} simulates continuous variables with social interactions (see Lee, 2004 and Lee et al., 2010).
}
\details{
For a complete information model, the outcome \eqn{y_i} is defined as:
\deqn{y_i = \lambda \bar{y}_i + \mathbf{z}_i'\Gamma + \epsilon_i,}
where \eqn{\bar{y}_i} is the average of \eqn{y} among peers,
\eqn{\mathbf{z}_i} is a vector of control variables,
and \eqn{\epsilon_i \sim N(0, \sigma^2)}.
In the case of incomplete information models with rational expectations, \eqn{y_i} is defined as:
\deqn{y_i = \lambda E(\bar{y}_i) + \mathbf{z}_i'\Gamma + \epsilon_i.}
}
\examples{
\donttest{
# Groups' size
set.seed(123)
M      <- 5 # Number of sub-groups
nvec   <- round(runif(M, 100, 1000))
n      <- sum(nvec)

# Parameters
lambda <- 0.4
Gamma  <- c(2, -1.9, 0.8, 1.5, -1.2)
sigma  <- 1.5
theta  <- c(lambda, Gamma, sigma)

# X
X      <- cbind(rnorm(n, 1, 1), rexp(n, 0.4))

# Network
G      <- list()

for (m in 1:M) {
  nm           <- nvec[m]
  Gm           <- matrix(0, nm, nm)
  max_d        <- 30
  for (i in 1:nm) {
    tmp        <- sample((1:nm)[-i], sample(0:max_d, 1))
    Gm[i, tmp] <- 1
  }
  rs           <- rowSums(Gm); rs[rs == 0] <- 1
  Gm           <- Gm/rs
  G[[m]]       <- Gm
}

# data
data   <- data.frame(X, peer.avg(G, cbind(x1 = X[,1], x2 =  X[,2])))
colnames(data) <- c("x1", "x2", "gx1", "gx2")

ytmp    <- simsar(formula = ~ x1 + x2 + gx1 + gx2, Glist = G, 
                  theta = theta, data = data) 
y       <- ytmp$y
}
}
\references{
Lee, L. F. (2004). Asymptotic distributions of quasi-maximum likelihood estimators for spatial autoregressive models. \emph{Econometrica}, 72(6), 1899-1925, \doi{10.1111/j.1468-0262.2004.00558.x}.

Lee, L. F., Liu, X., & Lin, X. (2010). Specification and estimation of social interaction models with network structures. The Econometrics Journal, 13(2), 145-176, \doi{10.1111/j.1368-423X.2010.00310.x}
}
\seealso{
\code{\link{sar}}, \code{\link{simsart}}, \code{\link{simcdnet}}.
}
