% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lnt_read.R
\name{lnt_read}
\alias{lnt_read}
\title{Read in a LexisNexis TXT file}
\usage{
lnt_read(x, encoding = "UTF-8", extract_paragraphs = TRUE,
  convert_date = TRUE, date_format = "\%B \%d, \%Y",
  start_keyword = "\\\\d+ of \\\\d+ DOCUMENTS$| Dokument \\\\d+ von \\\\d+$",
  end_keyword = "^LANGUAGE: |^SPRACHE: ",
  length_keyword = "^LENGTH: |^LÄNGE: ", verbose = TRUE)
}
\arguments{
\item{x}{Name or names of LexisNexis TXT file to be converted.}

\item{encoding}{Encoding to be assumed for input files. Defaults to UTF-8
(the LexisNexis standard value).}

\item{extract_paragraphs}{A logical flag indicating if the returned object
will include a third data frame with paragraphs.}

\item{convert_date}{A logical flag indicating if it should be tried to convert
the date of each article into Date format. Fails for non standard dates
provided by LexisNexis so it might be safer to convert date afterwards.}

\item{date_format}{If convert_date is set to TRUE will convert all dates using
the same pattern. See \link[base]{strptime}.}

\item{start_keyword}{Is used to indicate the beginning of an article. All
articles need to have same number of Beginnings, ends and lengths (which
indicate the the last line of meta-data).}

\item{end_keyword}{Is used to indicate the end of an article.}

\item{length_keyword}{Is used to indicate the end of the meta-data.}

\item{verbose}{A logical flag indicating whether information should be
printed to the screen.}
}
\value{
A LNToutput S4 object consisting of 3 data.frames for meta-data,
  articles and paragraphs.
}
\description{
Read a LexisNexis TXT file and convert it to a data frame.
}
\details{
The function can produce a LNToutput S4 object with two data.frame:
  meta, containing all meta information such as date, author and headline and
  articles, containing just the article ID and the text of the articles. When
  extract_paragraphs is set to TRUE, the output contains a third data.frame,
  similar to articles but with articles split into paragraphs.

  Note: All files need to have same number of Beginnings, ends and lengths
  (which indicate the the last line of meta-data). If this is true can be
  tested with \link{lnt_checkFiles}. In some cases it makes sense
  to change the keywords for these three important indicators e.g. to
  "^LANGUAGE: ENGLISH" to narrow down the search for the ends of an article.
}
\examples{
LNToutput <- lnt_read(lnt_sample())
meta.df <- LNToutput@meta
articles.df <- LNToutput@articles
paragraphs.df <- LNToutput@paragraphs
}
\author{
Johannes B. Gruber
}
\keyword{LexisNexis}
