% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfm_weight.R
\name{dfm_tfidf}
\alias{dfm_tfidf}
\title{Weight a dfm by \emph{tf-idf}}
\usage{
dfm_tfidf(x, scheme_tf = "count", scheme_df = "inverse", base = 10,
  ...)
}
\arguments{
\item{x}{object for which idf or tf-idf will be computed (a document-feature 
matrix)}

\item{scheme_tf}{scheme for \code{\link{dfm_weight}}; defaults to \code{"count"}}

\item{scheme_df}{scheme for \code{\link{docfreq}}; defaults to
\code{"inverse"}.  Other options to \code{\link{docfreq}} can be passed
through the ellipsis (\code{...}).}

\item{base}{the base for the logarithms in the \code{\link{tf}} and
\code{\link{docfreq}} calls; default is 10}

\item{...}{additional arguments passed to \code{\link{docfreq}}.}
}
\description{
Weight a dfm by term frequency-inverse document frequency (\emph{tf-idf}), 
with full control over options.  Uses fully sparse methods for efficiency.
}
\details{
\code{dfm_tfidf} computes term frequency-inverse document frequency
  weighting.  The default is to use counts instead of normalized term
  frequency (the relative term frequency within document), but this
  can be overridden using \code{scheme_tf = "prop"}.
}
\examples{
mydfm <- as.dfm(data_dfm_lbgexample)
head(mydfm[, 5:10])
head(dfm_tfidf(mydfm)[, 5:10])
docfreq(mydfm)[5:15]
head(dfm_weight(mydfm)[, 5:10])

# replication of worked example from
# https://en.wikipedia.org/wiki/Tf-idf#Example_of_tf.E2.80.93idf
wiki_dfm <- 
    matrix(c(1,1,2,1,0,0, 1,1,0,0,2,3),
           byrow = TRUE, nrow = 2,
           dimnames = list(docs = c("document1", "document2"),
                           features = c("this", "is", "a", "sample", 
                                        "another", "example"))) \%>\%
    as.dfm()
wiki_dfm    
docfreq(wiki_dfm)
dfm_tfidf(wiki_dfm, scheme_tf = "prop") \%>\% round(digits = 2)

\dontrun{
# comparison with tm
if (requireNamespace("tm")) {
    convert(wiki_dfm, to = "tm") \%>\% weightTfIdf() \%>\% as.matrix()
    # same as:
    dfm_tfidf(wiki_dfm, base = 2, scheme_tf = "prop")
}
}
}
\references{
Manning, C. D., Raghavan, P., & Schütze, H. (2008). 
  \emph{Introduction to Information Retrieval}. Cambridge University Press.
}
\seealso{
\code{\link{dfm_weight}}, \code{\link{docfreq}}
}
\keyword{dfm}
\keyword{weighting}
