% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pns.R
\name{pns_data}
\alias{pns_data}
\title{Download PNS microdata}
\usage{
pns_data(
  year = NULL,
  vars = NULL,
  cache_dir = NULL,
  refresh = FALSE,
  lazy = FALSE,
  backend = c("arrow", "duckdb")
)
}
\arguments{
\item{year}{Numeric or vector. Year(s) to download (2013, 2019).
Use NULL to download all available years. Default is NULL.}

\item{vars}{Character vector. Variables to select. Use NULL for all variables.
Default is NULL.}

\item{cache_dir}{Character. Directory for caching downloaded files.
Default uses \code{tools::R_user_dir("healthbR", "cache")}.}

\item{refresh}{Logical. If TRUE, re-download even if file exists in cache.
Default is FALSE.}

\item{lazy}{Logical. If TRUE, returns a lazy query object instead of a
tibble. Requires the \pkg{arrow} package. The lazy object supports
dplyr verbs (filter, select, mutate, etc.) which are pushed down
to the query engine before collecting into memory. Call
\code{dplyr::collect()} to materialize the result. Default: FALSE.}

\item{backend}{Character. Backend for lazy evaluation: \code{"arrow"}
(default) or \code{"duckdb"}. Only used when \code{lazy = TRUE}.
DuckDB backend requires the \pkg{duckdb} package.}
}
\value{
A tibble with PNS microdata.
}
\description{
Downloads and returns PNS microdata for specified years from the IBGE FTP.
Data is cached locally to avoid repeated downloads. When the \code{arrow} package
is installed, data is cached in parquet format for faster subsequent reads.
}
\details{
The PNS (Pesquisa Nacional de Saude) is a household survey conducted by IBGE
in partnership with the Ministry of Health. It provides comprehensive data on
health conditions, lifestyle, and healthcare access of the Brazilian population.
\subsection{Survey design variables}{

For proper statistical analysis with complex survey design, use the following
weight variables with the \code{srvyr} or \code{survey} packages:
\itemize{
\item \code{V0028}: household weight
\item \code{V0029}: selected person weight
\item \code{V0030}: person weight with non-response adjustment
\item \code{UPA_PNS}: primary sampling unit
\item \code{V0024}: stratum
}
}
}
\section{Data source}{

Data is downloaded from the IBGE FTP server:
\verb{https://ftp.ibge.gov.br/PNS/}
}

\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# download PNS 2019 data
df <- pns_data(year = 2019, cache_dir = tempdir())

# download all years
df_all <- pns_data(cache_dir = tempdir())

# select specific variables
df_subset <- pns_data(
  year = 2019,
  vars = c("V0001", "C006", "C008", "V0028"),
  cache_dir = tempdir()
)
\dontshow{\}) # examplesIf}
}
