% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simSingleCell.R
\name{simSCProfiles}
\alias{simSCProfiles}
\title{Simulate new single-cell RNA-Seq expression profiles using the ZINB-WaVE
model parameters}
\usage{
simSCProfiles(
  object,
  cell.ID.column,
  cell.type.column,
  n.cells,
  suffix.names = "_Simul",
  cell.types = NULL,
  file.backend = NULL,
  name.dataset.backend = NULL,
  compression.level = NULL,
  block.processing = FALSE,
  block.size = 1000,
  chunk.dims = NULL,
  verbose = TRUE
)
}
\arguments{
\item{object}{\code{\linkS4class{DigitalDLSorter}} object with
\code{single.cell.real} and \code{zinb.params} slots.}

\item{cell.ID.column}{Name or column number corresponding to the cell names
of expression matrix in cells metadata.}

\item{cell.type.column}{Name or column number corresponding to the cell type
of each cell in cells metadata.}

\item{n.cells}{Number of simulated cells generated per cell type (i.e. if you
have 10 different cell types in your dataset, if \code{n.cells = 100}, then
1000 cell profiles will be simulated).}

\item{suffix.names}{Suffix used on simulated cells. This suffix must be
unique in the simulated cells, so make sure that this suffix does not
appear in the real cell names.}

\item{cell.types}{Vector indicating the cell types to simulate. If
\code{NULL} (by default), \code{n.cells} single-cell profiles for all cell
types will be simulated.}

\item{file.backend}{Valid file path to store the simulated single-cell
expression profiles as an HDF5 file (\code{NULL} by default). If provided,
the data is stored in HDF5 files used as back-end by using the
\pkg{DelayedArray}, \pkg{HDF5Array} and \pkg{rhdf5} packages instead of
loading all data into RAM memory. This is suitable for situations where you
have large amounts of data that cannot be loaded into memory. Note that
operations on this data will be performed in blocks (i.e subsets of
determined size) which may result in longer execution times.}

\item{name.dataset.backend}{Name of the dataset in HDF5 file to be used. Note
that it cannot exist. If \code{NULL} (by default), a random dataset name
will be used.}

\item{compression.level}{The compression level used if \code{file.backend} is
provided. It is an integer value between 0 (no compression) and 9 (highest
and slowest compression). See
\code{?\link[HDF5Array]{getHDF5DumpCompressionLevel}} from the
\pkg{HDF5Array} package for more information.}

\item{block.processing}{Boolean indicating whether the data should be
simulated in blocks (only if \code{file.backend} is used, \code{FALSE} by
default). This functionality is suitable for cases where is not possible to
load all data into memory and it leads to larger execution times.}

\item{block.size}{Only if \code{block.processing = TRUE}. Number of
single-cell expression profiles that will be simulated in each iteration
during the process. Larger numbers result in higher memory usage but
shorter execution times. Set according to available computational resources
(1000 by default). Note that it cannot be greater than the total number of
simulated cells.}

\item{chunk.dims}{Specifies the dimensions that HDF5 chunk will have. If
\code{NULL}, the default value is a vector of two items: the number of
genes considered by the ZINB-WaVE model during the simulation and a single
sample in order to reduce read times in the following steps. A larger
number of columns written in each chunk can lead to longer read times in
subsequent steps. Note that it cannot be greater than the dimensions of the
simulated matrix.}

\item{verbose}{Show informative messages during the execution (\code{TRUE} by
default).}
}
\value{
A \code{\linkS4class{DigitalDLSorter}} object with
\code{single.cell.simul} slot containing a
\code{\linkS4class{SingleCellExperiment}} object with the simulated
single-cell expression profiles.
}
\description{
Simulate single-cell expression profiles by randomly sampling from a negative
binomial distribution and inserting dropouts by sampling from a binomial
distribution using the ZINB-WaVE parameters estimated by the
\code{\link{estimateZinbwaveParams}} function.
}
\details{
Before this step, see \code{?\link{estimateZinbwaveParams}}. As described in
Torroja and Sanchez-Cabo, 2019, this function simulates a given number of
transcriptional profiles for each cell type provided by randomly sampling
from a negative binomial distribution with \eqn{\mu} and \eqn{\theta}
estimated parameters and inserting dropouts by sampling from a binomial
distribution with probability pi. All parameters are estimated from
single-cell real data using the \code{\link{estimateZinbwaveParams}}
function. It uses the ZINB-WaVE model (Risso et al., 2018). For more details
about the model, see \code{?\link{estimateZinbwaveParams}} and Risso et al.,
2018.

The \code{file.backend} argument allows to create a HDF5 file with simulated
single-cell profiles to be used as back-end to work with data stored on disk
instead of loaded into RAM. If the \code{file.backend} argument is used with
\code{block.processing = FALSE}, all the single-cell profiles will be
simulated in one step and, therefore, loaded into in RAM memory. Then, data
will be written in HDF5 file. To avoid to collapse RAM memory if too many
single-cell profiles are simulated, single-cell profiles can be simulated and
written to HDF5 files in blocks of \code{block.size} size by setting
\code{block.processing = TRUE}.
}
\examples{
sce <- SingleCellExperiment::SingleCellExperiment(
  assays = list(
    counts = matrix(
      rpois(30, lambda = 5), nrow = 15, ncol = 10, 
      dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10)))
    )
  ),
  colData = data.frame(
    Cell_ID = paste0("RHC", seq(10)),
    Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10, 
                       replace = TRUE)
  ),
  rowData = data.frame(
    Gene_ID = paste0("Gene", seq(15))
  )
)
DDLS <- loadSCProfiles(
  single.cell.data = sce,
  cell.ID.column = "Cell_ID",
  gene.ID.column = "Gene_ID"
)
DDLS <- estimateZinbwaveParams(
  object = DDLS,
  cell.type.column = "Cell_Type",
  cell.ID.column = "Cell_ID",
  gene.ID.column = "Gene_ID", 
  subset.cells = 4,
  verbose = FALSE
)
DDLS <- simSCProfiles(
  object = DDLS,
  cell.ID.column = "Cell_ID",
  cell.type.column = "Cell_Type",
  n.cells = 2,
  verbose = TRUE
)

}
\references{
Risso, D., Perraudeau, F., Gribkova, S. et al. (2018). A general
and flexible method for signal extraction from single-cell RNA-seq data.
Nat Commun 9, 284. doi: \doi{10.1038/s41467-017-02554-5}.

Torroja, C. and Sánchez-Cabo, F. (2019). digitalDLSorter: A Deep Learning
algorithm to quantify immune cell populations based on scRNA-Seq data.
Frontiers in Genetics 10, 978. doi: \doi{10.3389/fgene.2019.00978}.
}
\seealso{
\code{\link{estimateZinbwaveParams}}
}
