% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/api_define_extract.R
\name{define_extract_nhgis}
\alias{define_extract_nhgis}
\title{Define an IPUMS NHGIS extract request}
\usage{
define_extract_nhgis(
  description = "",
  datasets = NULL,
  time_series_tables = NULL,
  shapefiles = NULL,
  geographic_extents = NULL,
  breakdown_and_data_type_layout = NULL,
  tst_layout = NULL,
  data_format = NULL
)
}
\arguments{
\item{description}{Description of the extract.}

\item{datasets}{List of dataset specifications for any
\href{https://www.nhgis.org/overview-nhgis-datasets}{datasets}
to include in the extract request. Use \code{\link[=ds_spec]{ds_spec()}} to create a
\code{ds_spec} object containing a dataset specification. See examples.}

\item{time_series_tables}{List of time series table specifications for any
\href{https://www.nhgis.org/time-series-tables}{time series tables}
to include in the extract request. Use \code{\link[=tst_spec]{tst_spec()}} to create a
\code{tst_spec} object containing a time series table specification. See
examples.}

\item{shapefiles}{Names of any \href{https://www.nhgis.org/gis-files}{shapefiles}
to include in the extract request.}

\item{geographic_extents}{Vector of geographic extents to use for
all of the \code{datasets} and \code{time_series_tables} in the extract
definition (for instance, to obtain data within a specified state).
By default, selects all available extents.

Use \code{\link[=get_metadata_nhgis]{get_metadata_nhgis()}} to identify the available extents for a given
dataset or time series table, if any.}

\item{breakdown_and_data_type_layout}{The desired layout
of any \code{datasets} that have multiple data types or breakdown values.
\itemize{
\item \code{"single_file"} (default) keeps all data types and breakdown values in
one file
\item \code{"separate_files"} splits each data type or breakdown value into its
own file
}

Required if any \code{datasets} included in the extract definition consist of
multiple data types (for instance, estimates and margins of error) or have
multiple breakdown values specified. See \code{\link[=get_metadata_nhgis]{get_metadata_nhgis()}} to
determine whether a requested dataset has multiple data types.}

\item{tst_layout}{The desired layout of all \code{time_series_tables} included in
the extract definition.
\itemize{
\item \code{"time_by_column_layout"} (wide format, default): rows correspond to
geographic units, columns correspond to different times in the time
series
\item \code{"time_by_row_layout"} (long format): rows correspond to a single
geographic unit at a single point in time
\item \code{"time_by_file_layout"}: data for different times are provided in
separate files
}

Required when an extract definition includes any \code{time_series_tables}.}

\item{data_format}{The desired format of the extract data file.
\itemize{
\item \code{"csv_no_header"} (default) includes only a minimal header in the first
row
\item \code{"csv_header"} includes a second, more descriptive header row.
\item \code{"fixed_width"} provides data in a fixed width format
}

Note that by default, \code{\link[=read_nhgis]{read_nhgis()}} removes the additional header row in
\code{"csv_header"} files.

Required when an extract definition includes any \code{datasets} or
\code{time_series_tables}.}
}
\value{
An object of class \code{\link[=ipums_extract-class]{nhgis_extract}} containing
the extract definition.
}
\description{
Define the parameters of an IPUMS NHGIS extract request to be submitted via
the IPUMS API.

Use \code{\link[=get_metadata_nhgis]{get_metadata_nhgis()}} to browse and identify data sources for use
in NHGIS extract definitions. For general information, see the NHGIS
\href{https://www.nhgis.org/data-availability}{data source overview} and the
\href{https://www.nhgis.org/frequently-asked-questions-faq}{FAQ}.

Learn more about the IPUMS API in \code{vignette("ipums-api")} and
NHGIS extract definitions in \code{vignette("ipums-api-nhgis")}.
}
\details{
An NHGIS extract definition must include at least one dataset, time series
table, or shapefile specification.

Create an NHGIS dataset specification with \code{\link[=ds_spec]{ds_spec()}}. Each dataset
must be associated with a selection of \code{data_tables} and \code{geog_levels}. Some
datasets also support the selection of \code{years} and \code{breakdown_values}.

Create an NHGIS time series table specification with \code{\link[=tst_spec]{tst_spec()}}. Each time
series table must be associated with a selection of \code{geog_levels} and
may optionally be associated with a selection of \code{years}.

See examples or \code{vignette("ipums-api-nhgis")} for more details about
specifying datasets and time series tables in an NHGIS extract definition.
}
\examples{
# Extract definition for tables from an NHGIS dataset
# Use `ds_spec()` to create an NHGIS dataset specification
nhgis_extract <- define_extract_nhgis(
  description = "Example NHGIS extract",
  datasets = ds_spec(
    "1990_STF3",
    data_tables = "NP57",
    geog_levels = c("county", "tract")
  )
)

nhgis_extract

# Use `tst_spec()` to create an NHGIS time series table specification
define_extract_nhgis(
  description = "Example NHGIS extract",
  time_series_tables = tst_spec("CL8", geog_levels = "county"),
  tst_layout = "time_by_row_layout"
)

# To request multiple datasets, provide a list of `ds_spec` objects
define_extract_nhgis(
  description = "Extract definition with multiple datasets",
  datasets = list(
    ds_spec("2014_2018_ACS5a", "B01001", c("state", "county")),
    ds_spec("2015_2019_ACS5a", "B01001", c("state", "county"))
  )
)

# If you need to specify the same table or geographic level for
# many datasets, you may want to make a set of datasets before defining
# your extract request:
dataset_names <- c("2014_2018_ACS5a", "2015_2019_ACS5a")

dataset_spec <- purrr::map(
  dataset_names,
  ~ ds_spec(
    .x,
    data_tables = "B01001",
    geog_levels = c("state", "county")
  )
)

define_extract_nhgis(
  description = "Extract definition with multiple datasets",
  datasets = dataset_spec
)

# You can request datasets, time series tables, and shapefiles in the same
# definition:
define_extract_nhgis(
  description = "Extract with datasets and time series tables",
  datasets = ds_spec("1990_STF1", c("NP1", "NP2"), "county"),
  time_series_tables = tst_spec("CL6", "state"),
  shapefiles = "us_county_1990_tl2008"
)

# Geographic extents are applied to all datasets/time series tables in the
# definition
define_extract_nhgis(
  description = "Extent selection",
  datasets = list(
    ds_spec("2018_2022_ACS5a", "B01001", "blck_grp"),
    ds_spec("2017_2021_ACS5a", "B01001", "blck_grp")
  ),
  geographic_extents = c("010", "050")
)

# Extract specifications can be indexed by name
names(nhgis_extract$datasets)

nhgis_extract$datasets[["1990_STF3"]]

\dontrun{
# Use the extract definition to submit an extract request to the API
submit_extract(nhgis_extract)
}
}
\seealso{
\code{\link[=get_metadata_nhgis]{get_metadata_nhgis()}} to find data to include in an extract definition.

\code{\link[=submit_extract]{submit_extract()}} to submit an extract request for processing.

\code{\link[=save_extract_as_json]{save_extract_as_json()}} and \code{\link[=define_extract_from_json]{define_extract_from_json()}} to share an
extract definition.
}
