% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/textmodel_wordfish.R
\name{textmodel_wordfish}
\alias{textmodel_wordfish}
\title{Wordfish text model}
\usage{
textmodel_wordfish(x, dir = c(1, 2), priors = c(Inf, Inf, 3, 1),
  tol = c(1e-06, 1e-08), dispersion = c("poisson", "quasipoisson"),
  dispersion_level = c("feature", "overall"), dispersion_floor = 0,
  sparse = TRUE, abs_err = FALSE, svd_sparse = TRUE,
  residual_floor = 0.5)
}
\arguments{
\item{x}{the dfm on which the model will be fit}

\item{dir}{set global identification by specifying the indexes for a pair of 
documents such that \eqn{\hat{\theta}_{dir[1]} < \hat{\theta}_{dir[2]}}.}

\item{priors}{prior precisions for the estimated parameters \eqn{\alpha_i}, 
\eqn{\psi_j}, \eqn{\beta_j}, and \eqn{\theta_i}, where \eqn{i} indexes 
documents and \eqn{j} indexes features}

\item{tol}{tolerances for convergence.  The first value is a convergence 
threshold for the log-posterior of the model, the second value is the 
tolerance in the difference in parameter values from the iterative 
conditional maximum likelihood (from conditionally estimating 
document-level, then feature-level parameters).}

\item{dispersion}{sets whether a quasi-Poisson quasi-likelihood should be 
used based on a single dispersion parameter (\code{"poisson"}), or 
quasi-Poisson (\code{"quasipoisson"})}

\item{dispersion_level}{sets the unit level for the dispersion parameter, 
options are \code{"feature"} for term-level variances, or \code{"overall"} 
for a single dispersion parameter}

\item{dispersion_floor}{constraint for the minimal underdispersion multiplier 
in the quasi-Poisson model.  Used to minimize the distorting effect of 
terms with rare term or document frequencies that appear to be severely 
underdispersed.  Default is 0, but this only applies if \code{dispersion = 
"quasipoisson"}.}

\item{sparse}{specifies whether the \code{"dfm"} is coerced to dense}

\item{abs_err}{specifies how the convergence is considered}

\item{svd_sparse}{uses svd to initialize the starting values of theta, 
only applies when \code{sparse = TRUE}}

\item{residual_floor}{specifies the threshold for residual matrix when 
calculating the svds, only applies when \code{sparse = TRUE}}
}
\value{
An object of class \code{textmodel_fitted_wordfish}.  This is a list 
  containing: \item{dir}{global identification of the dimension} 
  \item{theta}{estimated document positions} \item{alpha}{estimated document 
  fixed effects} \item{beta}{estimated feature marginal effects} 
  \item{psi}{estimated word fixed effects} \item{docs}{document labels} 
  \item{features}{feature labels} \item{sigma}{regularization parameter for 
  betas in Poisson form} \item{ll}{log likelihood at convergence} 
  \item{se.theta}{standard errors for theta-hats} \item{x}{dfm to which 
  the model was fit}
}
\description{
Estimate Slapin and Proksch's (2008) "wordfish" Poisson scaling model of 
one-dimensional document positions using conditional maximum likelihood.
}
\details{
The returns match those of Will Lowe's R implementation of 
  \code{wordfish} (see the austin package), except that here we have renamed 
  \code{words} to be \code{features}.  (This return list may change.)  We 
  have also followed the practice begun with Slapin and Proksch's early 
  implementation of the model that used a regularization parameter of 
  se\eqn{(\sigma) = 3}, through the third element in \code{priors}.
}
\note{
In the rare situation where a warning message of "The algorithm did not
  converge." shows up, removing some documents may work.
}
\examples{
(wf <- textmodel_wordfish(data_dfm_lbgexample, dir = c(1,5)))
summary(wf, n = 10)
coef(wf)
predict(wf)
predict(wf, se.fit = TRUE)
predict(wf, interval = "confidence")

\dontrun{
ie2010dwf <- dfm(data_corpus_irishbudget2010, verbose = FALSE)
(wf1 <- textmodel_wordfish(ie2010dfm, dir = c(6,5)))
(wf2a <- textmodel_wordfish(ie2010dfm, dir = c(6,5), 
                             dispersion = "quasipoisson", dispersion_floor = 0))
(wf2b <- textmodel_wordfish(ie2010dfm, dir = c(6,5), 
                             dispersion = "quasipoisson", dispersion_floor = .5))
plot(wf2a$phi, wf2b$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
     xlim = c(0, 1.0), ylim = c(0, 1.0))
plot(wf2a$phi, wf2b$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
     xlim = c(0, 1.0), ylim = c(0, 1.0), type = "n")
underdispersedTerms <- sample(which(wf2a$phi < 1.0), 5)
which(featnames(ie2010dfm) \%in\% names(topfeatures(ie2010dfm, 20)))
text(wf2a$phi, wf2b$phi, wf2a$features, 
     cex = .8, xlim = c(0, 1.0), ylim = c(0, 1.0), col = "grey90")
text(wf2a$phi['underdispersedTerms'], wf2b$phi['underdispersedTerms'], 
     wf2a$features['underdispersedTerms'], 
     cex = .8, xlim = c(0, 1.0), ylim = c(0, 1.0), col = "black")
if (require(austin)) {
    wf_austin <- austin::wordfish(quanteda::as.wfm(ie2010dfm), dir = c(6,5))
    cor(wf1$theta, wf_austin$theta)
}}
}
\references{
Jonathan Slapin and Sven-Oliver Proksch.  2008. "A Scaling Model 
  for Estimating Time-Series Party Positions from Texts." \emph{American 
  Journal of Political Science} 52(3):705-772.
  
  Lowe, Will and Kenneth Benoit. 2013. "Validating Estimates of Latent Traits
  from Textual Data Using Human Judgment as a Benchmark." \emph{Political Analysis}
  21(3), 298-313. \url{http://doi.org/10.1093/pan/mpt002}
}
\seealso{
\code{\link{predict.textmodel_wordfish}}
}
\author{
Benjamin Lauderdale, Haiyan Wang, and Kenneth Benoit
}
