% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/FormatRawLdaOutput.R
\name{FormatRawLdaOutput}
\alias{FormatRawLdaOutput}
\title{Format Raw Output from \code{\link[lda]{lda.collapsed.gibbs.sampler}}}
\usage{
FormatRawLdaOutput(lda_result, docnames, smooth = TRUE)
}
\arguments{
\item{lda_result}{The list value returned by \code{\link[lda]{lda.collapsed.gibbs.sampler}}}

\item{docnames}{A character vector giving the names of documents. This is generally rownames(dtm).}

\item{smooth}{Logical. Do you want to smooth your topic proportions so that 
there is a positive value for each term in each topic? Defaults to TRUE}
}
\value{
Returns a \code{list} with two elements: \code{phi} whose rows represent the 
distribution of words across a topic and \code{theta} whose rows represent 
the distribution of topics across a document.
}
\description{
extracts outputs from LDA model estimated with \code{lda} package by Jonathan Chang
}
\examples{
# Load a pre-formatted dtm and topic model
data(nih_sample_dtm) 

# Get a sample of documents
dtm <- nih_sample_dtm[ sample(1:nrow(nih_sample_dtm), 20) , ]

# re-create a character vector of documents from the DTM
lex <- Dtm2Docs(dtm)

# Format for input to lda::lda.collapsed.gibbs.sampler
lex <- lda::lexicalize(lex, vocab=colnames(dtm))

# Fit the model from lda::lda.collapsed.gibbs.sampler
lda <- lda::lda.collapsed.gibbs.sampler(documents = lex, K = 5, 
                                         vocab = colnames(dtm), 
                                         num.iterations=200, 
                                         alpha=0.1, eta=0.05)
                                         
# Format the result to get phi and theta matrices                                        
lda <- FormatRawLdaOutput(lda_result=lda, docnames=rownames(dtm), smooth=TRUE)

}

