% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/S4classes.R, R/regions.R
\docType{class}
\name{regions}
\alias{regions}
\alias{regions-class}
\alias{regions,corpus-method}
\alias{regions,subcorpus-method}
\alias{as.regions}
\alias{as.data.table.regions}
\title{Regions of a CWB corpus.}
\usage{
regions(x, s_attribute)

\S4method{regions}{corpus}(x, s_attribute)

\S4method{regions}{subcorpus}(x, s_attribute)

as.regions(x, ...)

\method{as.data.table}{regions}(x, keep.rownames, values = NULL, ...)
}
\arguments{
\item{x}{object of class \code{regions}}

\item{s_attribute}{An s-attribute denoted by a length-one \code{character} vector
for which regions shall be derived.}

\item{...}{Further arguments.}

\item{keep.rownames}{Required argument to safeguard consistency with S3
method definition in the \code{data.table} package. Unused in this context.}

\item{values}{values to assign to a column that will be added}
}
\description{
Class to store and process the regions of a corpus. Regions are defined by
start and end corpus positions and correspond to a set of tokens surrounded
by start and end XML tags.
}
\details{
The \code{regions} class is a minimal representation of regions and does not
include information on the "strucs" (region IDs) that are used internally to
obtain values of s-attributes or information, which combination of conditions
on s-attributes has been used to obtain regions. This is left to the
\code{subcorpus} corpus class. Whereas the \code{subcorpus} class is associated with
the assumption, that a set of regions is a meaningful sub-unit of a corpus,
the \code{regions} class has a focus on the individual sequences of tokens defined
by a structural attribute (such as paragraphs, sentences, named entities).

Information on regions is maintained in the \code{cpos} slot of the \code{regions} S4
class: A two-column \code{matrix} with begin and end corpus positions (first and
second column, respectively). All other slots are inherited from the \code{corpus}
class.

The understanding of "regions" is modelled on the usage of terms by CWB
developers. As it is put in the
\href{https://cwb.sourceforge.io/files/CQP_Manual.pdf}{CQP Interface and
Query Language Manual}: "Matching pairs of XML start and end tags are encoded
as token regions, identified by the corpus positions of the first token
(immediately following the start tag) and the last token (immediately
preceding the end tag) of the region." (p. 6)

The \code{as.regions}-method coerces objects to a \code{regions}-object.

The \code{as.data.table} method returns the matrix with corpus
positions in the slot \code{cpos} as a \code{data.table}.
}
\section{Slots}{

\describe{
\item{\code{cpos}}{A two-column \code{matrix} with start and end corpus positions (first
and second column, respectively).}
}}

\examples{
use("polmineR")
P <- partition("GERMAPARLMINI", date = "2009-11-12", speaker = "Jens Spahn")
R <- as.regions(P)

# Get regions matrix as data.table, without / with values
sc <- corpus("REUTERS") \%>\% subset(grep("saudi-arabia", places))
regions_dt <- as.data.table(sc)
regions_dt <- as.data.table(
  sc,
  values = s_attributes(sc, "id", unique = FALSE)
)
}
\seealso{
Other classes to manage corpora: 
\code{\link{corpus-class}},
\code{\link{phrases}},
\code{\link{ranges-class}},
\code{\link{subcorpus}}
}
\concept{classes to manage corpora}
