% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slide_imp.R
\name{slide_imp}
\alias{slide_imp}
\title{Sliding Window K-NN or PCA Imputation}
\usage{
slide_imp(
  obj,
  n_feat,
  n_overlap,
  k = NULL,
  colmax = 0.9,
  knn_method = c("euclidean", "manhattan"),
  cores = 1,
  post_imp = TRUE,
  dist_pow = 0,
  subset = NULL,
  ncp = NULL,
  scale = TRUE,
  pca_method = c("regularized", "EM"),
  coeff.ridge = 1,
  seed = NULL,
  row.w = NULL,
  nb.init = 1,
  maxiter = 1000,
  miniter = 5,
  .progress = TRUE
)
}
\arguments{
\item{obj}{A numeric matrix with \strong{samples in rows} and \strong{features in columns}.}

\item{n_feat}{Number of features in a window.}

\item{n_overlap}{Number of overlapping features between two windows.}

\item{k}{Number of nearest neighbors for imputation. 10 is a good starting point.}

\item{colmax}{A number from 0 to 1. Threshold of missing data above which K-NN imputation is skipped.}

\item{knn_method}{Either "euclidean" (default) or "manhattan". Distance metric for nearest neighbor calculation.}

\item{cores}{Number of cores to parallelize over.}

\item{post_imp}{Whether to impute remaining missing values (those that failed K-NN imputation)
using column means (default = \code{TRUE}).}

\item{dist_pow}{The amount of penalization for further away nearest neighbors in the weighted average.
\code{dist_pow = 0} (default) is the simple average of the nearest neighbors.}

\item{subset}{Character vector of column names or integer vector of column
indices specifying which columns to impute.}

\item{ncp}{integer corresponding to the number of components used to to predict the missing entries}

\item{scale}{boolean. By default TRUE leading to a same weight for each variable}

\item{pca_method}{"regularized" by default or "EM".}

\item{coeff.ridge}{1 by default to perform the regularized pca_imp (imputePCA) algorithm; useful only if method="Regularized". Other regularization terms can be implemented by setting the value to less than 1 in order to regularized less (to get closer to the results of the EM method) or more than 1 to regularized more (to get closer to the results of the mean imputation)}

\item{seed}{integer, by default seed = NULL implies that missing values are initially imputed by the mean of each variable. Other values leads to a random initialization}

\item{row.w}{Row weights. Can be one of:
\itemize{
\item \code{NULL} (default): all rows weighted equally.
\item A numeric vector of length \code{nrow(obj)}: custom positive weights.
\item \code{"n_miss"}: rows with more missing values receive lower weight.
}

Weights are normalized to sum to 1.}

\item{nb.init}{integer corresponding to the number of random initializations; the first initialization is the initialization with the mean imputation}

\item{maxiter}{integer, maximum number of iteration for the algorithm}

\item{miniter}{integer, minimum number of iteration for the algorithm}

\item{.progress}{Show progress bar (default = \code{TRUE}).}
}
\value{
A numeric matrix of the same dimensions as \code{obj} with missing values imputed.
}
\description{
Performs sliding window K-NN or PCA imputation of large numeric matrices column-wise.

This method assumes that columns are meaningfully sorted.
}
\details{
The sliding window approach divides the input matrix into smaller, overlapping
segments and applies imputation to each window independently. Values in overlapping
areas are averaged across windows to produce the final imputed result.
This approach assumes that features (columns) are sorted meaningfully (e.g.,
by genomic position, time, etc.).

Specify \code{k} and related arguments to use K-NN, \code{ncp} and related arguments for PCA.
}
\examples{
# Generate sample data with missing values with 20 samples and 100 columns
# where the column order is sorted (i.e., by genomic position)
set.seed(1234)
beta_matrix <- t(sim_mat(100, 20)$input)

# Sliding Window K-NN imputation by specifying `k`
imputed_knn <- slide_imp(
  beta_matrix,
  k = 5,
  n_feat = 50,
  n_overlap = 10,
  scale = FALSE # This argument belongs to PCA imputation and will be ignored
)
imputed_knn

# Sliding Window PCA imputation by specifying `ncp`
pca_knn <- slide_imp(
  beta_matrix,
  ncp = 2,
  n_feat = 50,
  n_overlap = 10
)
pca_knn

}
