% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/api_metadata.R
\name{get_metadata_nhgis}
\alias{get_metadata_nhgis}
\title{List available data sources from IPUMS NHGIS}
\usage{
get_metadata_nhgis(
  type = NULL,
  dataset = NULL,
  data_table = NULL,
  time_series_table = NULL,
  delay = 0,
  api_key = Sys.getenv("IPUMS_API_KEY")
)
}
\arguments{
\item{type}{One of \code{"datasets"}, \code{"data_tables"}, \code{"time_series_tables"},
or \code{"shapefiles"} indicating the type of summary metadata to retrieve.
Leave \code{NULL} if requesting metadata for a single \code{dataset}, \code{data_table},
or \code{time_series_table}.}

\item{dataset}{Name of an individual dataset for which to retrieve metadata.}

\item{data_table}{Name of an individual data table for which to retrieve
metadata. If provided, an associated \code{dataset} must also be specified.}

\item{time_series_table}{Name of an individual time series table for which
to retrieve metadata.}

\item{delay}{Number of seconds to delay between
successive API requests, if multiple requests are needed to retrieve all
records.

A delay is highly unlikely to be necessary and is intended only as a
fallback in the event that you cannot retrieve all metadata records without
exceeding the API rate limit.

Only used if \code{type} is provided.}

\item{api_key}{API key associated with your user account. Defaults to the
value of the \code{IPUMS_API_KEY} environment variable. See
\code{\link[=set_ipums_api_key]{set_ipums_api_key()}}.}
}
\value{
If \code{type} is provided, a \code{\link[tibble:tbl_df-class]{tibble}} of
summary metadata for all data sources of the provided \code{type}.
Otherwise, a named list of metadata for the specified \code{dataset},
\code{data_table}, or \code{time_series_table}.
}
\description{
Retrieve information about available NHGIS data sources, including
\href{https://www.nhgis.org/overview-nhgis-datasets}{datasets},
data tables (summary tables),
\href{https://www.nhgis.org/time-series-tables}{time series tables},
and \href{https://www.nhgis.org/gis-files}{shapefiles} (GIS files).

To retrieve summary metadata for all available data sources of a particular
type, use the \code{type} argument. To retrieve detailed metadata for a
single data source, use the \code{dataset}, \code{data_table}, or \code{time_series_table}
argument. See the \emph{metadata availability} section below for information on
the metadata provided for each data type.

For general information, see the NHGIS
\href{https://www.nhgis.org/data-availability}{data source overview} and the
\href{https://www.nhgis.org/frequently-asked-questions-faq}{FAQ}.

Learn more about the IPUMS API in \code{vignette("ipums-api")} and
NHGIS extract definitions in \code{vignette("ipums-api-nhgis")}.
}
\section{Metadata availability}{

The following sections summarize the metadata fields provided for each data
type. Summary metadata include a subset of the fields provided for individual
data sources.
\subsection{Datasets:}{
\itemize{
\item \strong{\code{name}:} The unique identifier for the dataset. This is the value that
is used to refer to the dataset when interacting with the IPUMS API.
\item \strong{\code{group}:} The group of datasets to which the dataset belongs.
For instance, 5 separate datasets are part of the
\code{"2015 American Community Survey"} group.
\item \strong{\code{description}:} A short description of the dataset.
\item \strong{\code{sequence}:} Order in which the dataset will appear in the metadata API
and extracts.
\item \strong{\code{has_multiple_data_types}:} Logical value indicating whether multiple
data types exist for this dataset. For example, ACS
datasets include both estimates and margins of error.
\item \strong{\code{data_tables}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing names,
codes, and descriptions for all data tables available for the dataset.
\item \strong{\code{geog_levels}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing names,
descriptions, and extent information for the geographic levels available
for the dataset. The \code{has_geog_extent_selection} field contains logical
values indicating whether extent selection is allowed (and required) for
the associated geographic level. See \code{geographic_instances} below.
\item \strong{\code{breakdowns}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing names,
types, descriptions, and breakdown values for all breakdowns available
for the dataset.
\item \strong{\code{years}:} A vector of years for which the dataset is available. This
field is only present if a dataset is available for multiple years. Note
that ACS datasets are not considered to be available for multiple years.
\item \strong{\code{geographic_instances}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing
names and descriptions for all valid geographic extents for the
dataset. This field is only present if at least one of the dataset's
\code{geog_levels} allows geographic extent selection.
}
}

\subsection{Data tables:}{
\itemize{
\item \strong{\code{name}:} The unique identifier for the data table within its dataset.
This is the value that is used to refer to the data table when interacting
with the IPUMS API.
\item \strong{\code{description}:} A short description of the data table.
\item \strong{\code{universe}:} The statistical population measured by this data table
(e.g. persons, families, occupied housing units, etc.)
\item \strong{\code{nhgis_code}:} The code identifying the data table in the extract.
Variables in the extract data will include column names prefixed with this
code.
\item \strong{\code{sequence}:} Order in which the data table will appear in the metadata
API and extracts.
\item \strong{\code{dataset_name}:} Name of the dataset to which this data table belongs.
\item \strong{\code{n_variables}:} Number of variables included in this data table.
\item \strong{\code{variables}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing variable
descriptions and codes for the variables included in the data table
}
}

\subsection{Time series tables:}{
\itemize{
\item \strong{\code{name}:} The unique identifier for the time series table. This is the
value that is used to refer to the time series table when interacting with
the IPUMS API.
\item \strong{\code{description}:} A short description of the time series table.
\item \strong{\code{geographic_integration}:} The method by which the time series table
aligns geographic units across time. \code{"Nominal"} integration indicates
that geographic units are aligned by name (disregarding changes in unit
boundaries). \code{"Standardized"} integration indicates that data from multiple
time points are standardized to the indicated year's census units. For
more information, click
\href{https://www.nhgis.org/time-series-tables#geographic-integration}{here}.
\item \strong{\code{sequence}:} Order in which the time series table will appear in the
metadata API and extracts.
\item \strong{\code{time_series}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing names
and descriptions for the individual time series available for the
time series table.
\item \strong{\code{years}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing
information on the available data years for the
time series table.
\item \strong{\code{geog_levels}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing names
and descriptions for the geographic levels available
for the time series table.
\item \strong{\code{geographic_instances}:} A \code{\link[tibble:tbl_df-class]{tibble}} containing
names and descriptions for all valid geographic extents for the
time series table. Includes all states or state equivalents that are
valid for \emph{any} year in the time series table. (Some instances may be valid
for some but not all years.)
}
}

\subsection{Shapefiles:}{
\itemize{
\item \strong{\code{name}:} The unique identifier for the shapefile. This is the
value that is used to refer to the shapefile when interacting with
the IPUMS API.
\item \strong{\code{year}:} The survey year in which the shapefile's represented areas
were used for tabulations, which may be different than the vintage of the
represented areas. For more information, click
\href{https://www.nhgis.org/gis-files#years}{here}.
\item \strong{\code{geographic_level}:} The geographic level of the shapefile.
\item \strong{\code{extent}:} The geographic extent covered by the shapefile.
\item \strong{\code{basis}:} The derivation source of the shapefile.
\item \strong{\code{sequence}:} Order in which the shapefile will appear in the
metadata API and extracts.
}
}
}

\examples{
\dontrun{
library(dplyr)

# Get summary metadata for all available sources of a given data type
get_metadata_nhgis("datasets")

# Filter to identify data sources of interest by their metadata values
all_tsts <- get_metadata_nhgis("time_series_tables")

tsts <- all_tsts \%>\%
  filter(
    grepl("Children", description),
    grepl("Families", description),
    geographic_integration == "Standardized to 2010"
  )

tsts$name

# Get detailed metadata for a single source with its associated argument:
cs5_meta <- get_metadata_nhgis(time_series_table = "CS5")
cs5_meta$geog_levels

# Use the available values when defining an NHGIS extract request
define_extract_nhgis(
  time_series_tables = tst_spec("CS5", geog_levels = "state")
)

# Detailed metadata is also provided for datasets and data tables
get_metadata_nhgis(dataset = "1990_STF1")
get_metadata_nhgis(data_table = "NP1", dataset = "1990_STF1")

# Iterate over data sources to retrieve detailed metadata for several
# records. For instance, to get variable metadata for a set of data tables:
tables <- c("NP1", "NP2", "NP10")

var_meta <- purrr::map(
  tables,
  function(dt) {
    dt_meta <- get_metadata_nhgis(dataset = "1990_STF1", data_table = dt)

    # This ensures you avoid hitting rate limit for large numbers of tables
    Sys.sleep(1)

    dt_meta$variables
  }
)
}
}
\seealso{
\code{\link[=define_extract_nhgis]{define_extract_nhgis()}} to create an IPUMS NHGIS extract definition.
}
