% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calc_longterm_daily_stats.R
\name{calc_longterm_daily_stats}
\alias{calc_longterm_daily_stats}
\title{Calculate long-term summary statistics from daily mean flows}
\usage{
calc_longterm_daily_stats(
  data,
  dates = Date,
  values = Value,
  groups = STATION_NUMBER,
  station_number,
  percentiles = c(10, 90),
  roll_days = 1,
  roll_align = "right",
  water_year_start = 1,
  start_year,
  end_year,
  exclude_years,
  months = 1:12,
  complete_years = FALSE,
  include_longterm = TRUE,
  custom_months,
  custom_months_label,
  transpose = FALSE,
  ignore_missing = FALSE
)
}
\arguments{
\item{data}{Data frame of daily data that contains columns of dates, flow values, and (optional) groups (e.g. station numbers).
Leave blank or set to \code{NULL} if using \code{station_number} argument.}

\item{dates}{Name of column in \code{data} that contains dates formatted YYYY-MM-DD. Only required if dates column name is not 
'Date' (default). Leave blank or set to \code{NULL} if using \code{station_number} argument.}

\item{values}{Name of column in \code{data} that contains numeric flow values, in units of cubic metres per second.
Only required if values column name is not 'Value' (default). Leave blank if using \code{station_number} argument.}

\item{groups}{Name of column in \code{data} that contains unique identifiers for different data sets, if applicable. Only required 
if groups column name is not 'STATION_NUMBER'. Function will automatically group by a column named 'STATION_NUMBER' if 
present. Remove the 'STATION_NUMBER' column beforehand to remove this grouping. Leave blank if using \code{station_number} 
argument.}

\item{station_number}{Character string vector of seven digit Water Survey of Canada station numbers (e.g. \code{"08NM116"}) of
which to extract daily streamflow data from a HYDAT database. Requires \code{tidyhydat} package and a HYDAT database.
Leave blank if using \code{data} argument.}

\item{percentiles}{Numeric vector of percentiles to calculate. Set to \code{NA} if none required. Default \code{c(10,90)}.}

\item{roll_days}{Numeric value of the number of days to apply a rolling mean. Default \code{1}.}

\item{roll_align}{Character string identifying the direction of the rolling mean from the specified date, either by the first 
(\code{'left'}), last (\code{'right'}), or middle (\code{'center'}) day of the rolling n-day group of observations. 
Default \code{'right'}.}

\item{water_year_start}{Numeric value indicating the month (\code{1} through \code{12}) of the start of water year for
analysis. Default \code{1}.}

\item{start_year}{Numeric value of the first year to consider for analysis. Leave blank or set well before start date (i.e.
\code{1800}) to use from the first year of the source data.}

\item{end_year}{Numeric value of the last year to consider for analysis. Leave blank or set well after end date (i.e.
\code{2100}) to use up to the last year of the source data.}

\item{exclude_years}{Numeric vector of years to exclude from analysis. Leave blank or set to \code{NULL} to include all years.}

\item{months}{Numeric vector of months to include in analysis. For example, \code{3} for March, \code{6:8} for Jun-Aug or 
\code{c(10:12,1)} for first four months (Oct-Jan) when \code{water_year_start = 10} (Oct). Default summarizes all 
months (\code{1:12}).}

\item{complete_years}{Logical values indicating whether to include only years with complete data in analysis. Default \code{FALSE}.}

\item{include_longterm}{Logical value indicating whether to include long-term calculation of all data. Default \code{TRUE}.}

\item{custom_months}{Numeric vector of months to combine to summarize (ex. \code{6:8} for Jun-Aug). Adds results to the end of table.
If wanting months that overlap calendar years (ex. Oct-Mar), choose \code{water_year_start} that begins before the first 
month listed. Leave blank for no custom month summary.}

\item{custom_months_label}{Character string to label custom months. For example, if \code{months = 7:9} you may choose 
\code{"Summer"} or \code{"Jul-Sep"}. Default \code{"Custom-Months"}.}

\item{transpose}{Logical value indicating whether to transpose rows and columns of results. Default \code{FALSE}.}

\item{ignore_missing}{Logical value indicating whether dates with missing values should be included in the calculation. If
\code{TRUE} then a statistic will be calculated regardless of missing dates. If \code{FALSE} then only those statistics from
time periods with no missing dates will be returned. Default \code{FALSE}.}
}
\value{
A tibble data frame with the following columns:
  \item{Month}{month of the year, included 'Long-term' for all months, and 'Custom-Months' if selected}
  \item{Mean}{mean of all daily data for a given month and long-term over all years}
  \item{Median}{median of all daily data for a given month and long-term over all years}
  \item{Maximum}{maximum of all daily data for a given month and long-term over all years}
  \item{Minimum}{minimum of all daily data for a given month and long-term over all years}
  \item{P'n'}{each  n-th percentile selected for a given month and long-term over all years}
  Default percentile columns:
  \item{P10}{annual 10th percentile selected for a given month and long-term over all years}
  \item{P90}{annual 90th percentile selected for a given month and long-term over all years}
  Transposing data creates a column of "Statistics" and subsequent columns for each year selected.
}
\description{
Calculates the long-term mean, median, maximum, minimum, and percentiles of daily flow values for over all months and 
   all data (Long-term) from a daily streamflow data set. Calculates statistics from all values, unless specified.
   Returns a tibble with statistics.
}
\examples{
# Run if HYDAT database has been downloaded (using tidyhydat::download_hydat())
if (file.exists(tidyhydat::hy_downloaded_db())) {

# Calculate long-term statistics using data argument with defaults
flow_data <- tidyhydat::hy_daily_flows(station_number = "08NM116")
calc_longterm_daily_stats(data = flow_data,
                          start_year = 1980)

# Calculate long-term statistics using station_number argument with defaults
calc_longterm_daily_stats(station_number = "08NM116",
                          start_year = 1980)

# Calculate long-term statistics regardless if there is missing data for a given year
calc_longterm_daily_stats(station_number = "08NM116",
                          ignore_missing = TRUE)
                  
# Calculate long-term statistics for water years starting in October
calc_longterm_daily_stats(station_number = "08NM116",
                          start_year = 1980,
                          water_year_start = 10)
                  
# Calculate long-term statistics with custom years and percentiles
calc_longterm_daily_stats(station_number = "08NM116",
                          start_year = 1981,
                          end_year = 2010,
                          exclude_years = c(1991,1993:1995),
                          percentiles = c(25,75))
                    
# Calculate long-term statistics and add custom stats for July-September
calc_longterm_daily_stats(station_number = "08NM116",
                          start_year = 1980,
                          custom_months = 7:9,
                          custom_months_label = "Summer")                  
                          
}
}
