% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/factorial_preprocessing.R
\name{factorial_preprocessing}
\alias{factorial_preprocessing}
\title{A function to perform factorial preprocessing of a corpus of texts
into quanteda document-frequency matrices.}
\usage{
factorial_preprocessing(text, use_ngrams = TRUE,
  infrequent_term_threshold = 0.01, parallel = FALSE, cores = 1,
  intermediate_directory = NULL, parameterization_range = NULL,
  return_results = TRUE, verbose = TRUE)
}
\arguments{
\item{text}{A vector of strings (one per document) or quanteda corpus object
from which we wish to form a document-term matrix.}

\item{use_ngrams}{Option to extract 1,2, and 3-grams from the text as another
potential preprocessing step. Defaults to TRUE.}

\item{infrequent_term_threshold}{A proportion threshold at which infrequent
terms are to be filtered. Defaults to 0.01 (terms that appear in less than
1 percent of documents).}

\item{parallel}{Logical indicating whether factorial prerpocessing should be
performed in parallel. Defaults to FALSE.}

\item{cores}{Defaults to 1, can be set to any number less than or equal to
the number of cores on one's computer.}

\item{intermediate_directory}{Optional path to a directory where each dfm
will be saved as an intermediate step. The file names will follow the
convention intermediate_dfm_i.Rdata, where i is the index of the combination
of preprocessing choices. The function will then attempt to read all of the
dfm's back into a list if return_results = TRUE (by default), or simply end
the function call if return_results = FALSE. This can be a useful option if
the user is preprocessing a corpus that would make a dfm list that was
impractical to work with due to its size.}

\item{parameterization_range}{Defaults to NULL, but can be set to a numeric
vector of indexes relating to preprocessing decisions. This can be used to
restart large analyses after power failure.}

\item{return_results}{Defaults to TRUE, can be set to FALSE to prevent an
overly large dfm list from being created.}

\item{verbose}{Logical indicating whether more information should be printed
to the screen to let the user know about progress in preprocessing. Defaults
to TRUE.}
}
\value{
A list object containing permutations of the document-term matrix.
}
\description{
Preprocesses a corpus of texts into a document-frequency matrix
in 128 different ways.
}
\examples{
\dontrun{
# load the package
library(preText)
# load in the data
data("UK_Manifestos")
# preprocess data
preprocessed_documents <- factorial_preprocessing(
    UK_Manifestos,
    use_ngrams = TRUE,
    infrequent_term_threshold = 0.02,
    verbose = TRUE)
}
}

