% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tab.R, R/tab_classes.R
\name{tab_many}
\alias{tab_many}
\alias{tab_get_vars}
\alias{is_tab}
\alias{set_color_style}
\alias{get_color_style}
\alias{set_color_breaks}
\alias{get_color_breaks}
\title{Many cross-tables as one, with color helpers}
\usage{
tab_many(
  data,
  row_vars,
  col_vars,
  tab_vars,
  wt,
  pct = "no",
  color = "no",
  OR = "no",
  chi2 = FALSE,
  na = "keep",
  levels = "all",
  na_drop_all,
  cleannames = NULL,
  other_if_less_than = 0,
  other_level = "Others",
  ref = "auto",
  ref2 = "first",
  comp = "tab",
  ci = "no",
  conf_level = 0.95,
  method_cell = "wilson",
  method_diff = "ac",
  totaltab = "line",
  totaltab_name = "Ensemble",
  totrow = TRUE,
  totcol = "last",
  total_names = "Total",
  add_n = TRUE,
  add_pct = FALSE,
  digits = 0,
  subtext = "",
  filter
)

tab_get_vars(tabs, vars = c("row_var", "col_vars", "tab_vars"))

is_tab(x)

set_color_style(
  type = c("text", "bg"),
  theme = NULL,
  html_24_bit = c("blue_red", "green_red", "no"),
  custom_palette = NULL
)

get_color_style(
  mode = c("crayon", "color_code"),
  type = NULL,
  theme = NULL,
  html_24_bit = NULL
)

set_color_breaks(pct_breaks, mean_breaks, contrib_breaks)

get_color_breaks(brk, type = c("positive", "all"))
}
\arguments{
\item{data}{A data frame.}

\item{row_vars}{The row variable, which will be printed with one level per line.
If numeric, it will be converted to factor. If more than one row_var if provided,
a different table is made for each of them.}

\item{col_vars}{<\link[tidyr:tidyr_tidy_select]{tidy-select}>
One column is printed for each level of each column variable.
For numeric variables means are calculated, in a single column.
To pass many variables you may use syntax \code{col_vars = c(col_var1, col_var2, ...)}.}

\item{tab_vars}{<\link[tidyr:tidyr_tidy_select]{tidy-select}>
One subtable is made for each combination of levels of the tab variables.
To pass many variables you may use syntax \code{tab_vars = c(tab_var1, tab_var2, ...)}.
All tab variables are converted to factor. Leave empty to make a simple table.}

\item{wt}{A weight variable, of class numeric. Leave empty for unweighted results.}

\item{pct}{The type of percentages to calculate :
\itemize{
\item \code{"row"}: row percentages.
\item \code{"col"}: column percentages.
\item \code{"all"}: frequencies for each subtable/group, if there is \code{tab_vars}.
\item \code{"all_tabs"}: frequencies for the whole (set of) table(s).
}
The argument is vectorised over both \code{row_vars} and \code{col_vars}. You can then write as
the following :
\code{pct = list(row_var1 = list("row", "col", "col"), row_var2 = list("col", "row", "row"))}}

\item{color}{The type of colors to print, as a single string. Vectorised over \code{row_vars}.
\itemize{
\item \code{"no"}: by default, no colors are printed.
\item \code{"diff"}: color percentages and means based on cells differences from
totals (or from first cells when \code{ref = "first"}).
\item \code{"diff_ci"}: color pct and means based on cells differences from totals
or first cells, removing coloring when the confidence interval of this difference
is higher than the difference itself.
\item \code{"after_ci"}: idem, but cut off the confidence interval from the
difference first.
\item \code{"contrib"}: color cells based on their contribution to variance
(except mean columns, from numeric variables).
\item \code{"OR"}: for \code{pct == "col"} or \code{pct == "row"},
color based on odds ratios (or relative risks ratios)
\item \code{"auto"}: frequencies (\code{pct = "all"}, \code{pct = "all_tabs"})
and counts are colored with \code{"contrib"}.
When \code{ci = "diff"}, row and col percentages are colored with "after_ci" ;
otherwise they are colored with "diff".
}}

\item{OR}{With \code{pct = "row"} or \code{pct = "col"}, calculate and print odds ratios
(for binary variables) or relative risks ratios (for variables with 3 levels
or more).
\itemize{
\item \code{"no"}: by default, no OR are calculated.
\item \code{"OR"}: print OR (instead of percentages).
\item \code{"OR_pct"}: print OR, with percentages in bracket.
}}

\item{chi2}{Set to \code{TRUE} to calculate Chi2 summaries with \code{\link{tab_chi2}}.
Useful to print metadata, and to color cells based on their contribution to variance
(\code{color = "contrib"}). Vectorised over \code{row_vars}.}

\item{na}{The policy to adopt with missing values. It must be a single string.
\itemize{
\item \code{na = "keep"}: by default, prints \code{NA}'s as explicit \code{"NA"} level.
\item \code{na = "drop"}: removes \code{NA} levels before making each table
(tabs made with different column variables may have a different number of
observations, and won't exactly have the same total columns).
\item \code{"drop_all"}: remove \code{NA}'s for all variables before making the tables.
}}

\item{levels}{The levels of \code{col_vars} to keep (for more complex selections
use \code{\link[dplyr:select]{dplyr::select}}). The argument is vectorised over \code{col_vars}.
\itemize{
\item \code{"all"}: by default, all levels are kept.
\item \code{"first"}: only keep the first level of each \code{col_vars}
\item \code{"auto"}: keep the first level when \code{col_var} is only two levels,
keep all levels otherwise
}}

\item{na_drop_all}{<\link[tidyr:tidyr_tidy_select]{tidy-select}>
Removes all observations with a \code{NA} in any of the chosen variables, for all tables
(tabs for each column variable will have the same number of observations).}

\item{cleannames}{Set to \code{TRUE} to clean levels names, by removing
prefix numbers like "1-", and text in parenthesis. All data formatting arguments are
passed to \code{\link{tab_prepare}}.}

\item{other_if_less_than}{When set to a positive integer, levels with less count
than it will be merged into an "Others" level.}

\item{other_level}{The name of the "Other" level, as a single string.}

\item{ref}{The reference cell to calculate differences and ratios
(used to print \code{colors}) :
\itemize{
\item \code{"auto"}: by default, cell difference from the corresponding total
(rows or cols depending on \code{pct = "row"} or \code{pct = "col"}) is
used for \code{diff} ; cell ratio from the first line (or col) is use for \code{OR}
(odds ratio/relative risks ratio).
\item \code{"tot"}: totals are always used.
\item \code{"first"}: calculate cell difference or ratio from the first cell
of the row or column (useful to color temporal developments).
\item \code{n}: when \code{ref} is an integer, the nth row (or column) is used for comparison.
\item \code{"regex"}: when \code{ref} is a string, it it used as a regular expression,
to match with the names of the rows (or columns). Be precise enough to match only one
column or row, otherwise you get a warning message.
\item \code{"no"}: not use ref and not calculate diffs to gain calculation time.
}}

\item{ref2}{A second reference cell is needed to calculate odds ratios
(or relative risks ratios). The first cell of the row or column is used by default.
See \code{ref} above for the full list of possible values.}

\item{comp}{The comparison level : by subtables/groups, or for the whole table.
Vectorised over \code{row_vars}.
\itemize{
\item \code{"tab"}: by default, contributions to variance,
row differences from totals/first cells, and row confidence intervals for these
differences, are calculated for each \code{tab_vars} group.
\item \code{"all"}: compare cells to the general total line (provided there is
a total table with a total row), or with the reference line of the total table
when \code{ref = "first"}, an integer or a regular expression.
}}

\item{ci}{The type of confidence intervals to calculate, passed to \code{\link{tab_ci}}.
Vectorised over \code{row_vars}.
\itemize{
\item \code{"cell"}: absolute confidence intervals of cells percentages.
\item \code{"diff"}: confidence intervals of the difference between a cell and the
relative total cell (or relative first cell when \code{ref = "first"}).
\item \code{"auto"}: \code{ci = "diff"} for means and row/col percentages,
\code{ci = "cell"} for frequencies ("all", "all_tabs").
}
By default, for percentages, with \code{ci = "cell"} Wilson's method is used,
and with \code{ci = "diff"} Wald's method along Agresti and Caffo's adjustment.
Means use classic method. This can be changed with \code{method_cell}
and \code{method_diff}. By default, with \code{ci = "cell"}, the result is printed
in the \verb{[inf;sup]} form. Set \code{options("tabxplor.ci_print" = "moe")} to print
\code{pct +- moe} instead.}

\item{conf_level}{The confidence level, as a single numeric between 0 and 1.
Default to 0.95 (95\%).}

\item{method_cell}{Character string specifying which method to use with percentages
for \code{ci = "cell"}. This can be one out of:
"wald", "wilson", "wilsoncc", "agresti-coull", "jeffreys", "modified wilson",
"modified jeffreys", "clopper-pearson", "arcsine", "logit", "witting", "pratt",
"midp", "lik" and "blaker". Defaults to "wilson".
See \code{\link[DescTools:BinomCI]{BinomCI}}.}

\item{method_diff}{Character string specifying which method to use with percentages
for \code{ci = "diff"}. This can be one out of: "wald", "waldcc", "ac", "score",
"scorecc", "mn", "mee", "blj", "ha", "hal", "jp". Defaults to "ac", Wald interval with
the adjustment according to Agresti, Caffo for difference in proportions and
independent samples. See \code{\link[DescTools:BinomDiffCI]{BinomDiffCI}}.}

\item{totaltab}{The total table, if there are subtables/groups
(i.e. when \code{tab_vars} is provided). Vectorised over \code{row_vars}.
\itemize{
\item \code{"line"}: by default, add a general total line (necessary for
calculations with \code{comp = "all"})
\item \code{"table"}: add a complete total table
(i.e. \code{row_var} by \code{col_vars} without \code{tab_vars}).
\item \code{"no"}: not to draw any total table.
}}

\item{totaltab_name}{The name of the total table, as a single string.}

\item{totrow}{By default, total rows are printed.
Set to \code{FALSE} to remove them (after calculations if needed).
Vectorised over \code{row_vars}.}

\item{totcol}{The policy with total columns. Vectorised over \code{col_vars}.
\itemize{
\item \code{"last"}: by default, only prints a total column for the last
column variable (of class factor, not numeric).
\item \code{"each"}: print a total column for each column variable.
\item \code{"no"}: remove all total columns (after calculations if needed).
}}

\item{total_names}{The names of the totals, as a character vector of length one or two.
Use syntax of type \code{c("Total row", "Total column")} to set different names for
rows and cols.}

\item{add_n}{For \code{pct = "row"} or \code{pct = "col"}, set to \code{FALSE} not to add another
column or row with unweighted counts (\code{n}).}

\item{add_pct}{Set to \code{TRUE} to add a column with the frequencies of the row
variable (for \code{pct = "row"}) or a row with the frequencies of the column variable
(for  \code{pct = "col"}).}

\item{digits}{The number of digits to print, as a single integer, or an integer vector
the same length as \code{col_vars}. The argument is vectorisez over \code{col_vars}.}

\item{subtext}{A character vector to print rows of legend under the table.}

\item{filter}{A \code{\link[dplyr:filter]{dplyr::filter}} to apply to the data frame
first, as a single string (which will be converted to code, i.e. to a call).
Useful when printing multiples tabs with \code{\link[tibble:tribble]{tibble::tribble}},
to use different filters for similar tables or simply make the field of observation
more visible into the code.}

\item{tabs}{A \code{tibble} of class \code{tab}, made with \code{\link{tab}},
\code{\link{tab_many}} or \code{\link{tab_plain}}.}

\item{vars}{In \code{tab_get_vars}, a character vector containing the wanted vars names:
\code{"row_var"}, \code{"col_vars"} or \code{"tab_vars"}.}

\item{x}{A object to test with \code{\link{is_tab}}.}

\item{type}{Default to \code{"positive"}, which just print breaks for positive spreads.
Set to \code{all} to get breaks for negative spreads as well.}

\item{theme}{For \code{set_color_style} and \code{get_color_style}, is your console
or html table background \code{"light"} or \code{"dark"} ? Default to RStudio theme.}

\item{html_24_bit}{Use 24bits colors palettes for html tables : set to \code{"green_red"}
or \code{"blue_red"}. Only with \code{mode = "color_code"} (not \code{mode = "crayon"}) and
\verb{theme = "light}. Default to \code{getOption("tabxplor.color_html_24_bit")}.}

\item{custom_palette}{Possibility to provide a custom color styles, as a character
vector of 10 html color codes (the five first for over-represented numbers,
the five last for under-represented ones). The result is saved to
\code{options("tabxplor.color_style")}. To discard, relaunch the function with
\code{custom_palette = NULL}.}

\item{mode}{By default, \code{get_color_style} returns a list of \pkg{crayon} coloring
functions. Set to \code{"color_code"} to return html color codes.}

\item{pct_breaks}{If they are to be changed, the breaks used for percentages.
Default to \code{c(0.05, 0.1, 0.2, 2, 0.3)} : first color used when the pct of a cell
is +5\% superior to the pct of the related total ; second color used when
it is +10\% superior ; third +20\% superior ; fourth *2 superior ;
fifth +30\% superior. When > 1, it does not take differences but ratio.
The opposite for cells inferior to the total (without the *2 rule).
With \code{color = "after_ci"}, the first break is subtracted from all breaks
(default becomes \code{c(0, 0.05, 0.15, 2, 0.25)} : +0\%, +5\%, +15\%, *2, +25\%).}

\item{mean_breaks}{If they are to be changed, the breaks used for means.
Default to \code{c(1.15, 1.5, 2, 4)} : first color used when the mean of a cell
is superior to 1.15 times the mean of the related total row ; second color
used when it is superior to 1.5 times ; etc.
The opposite for cells inferior to the total.
With \code{color = "after_ci"}, the first break is divided from all breaks
(default becomes \code{c(1, 1.3, 1.7, 3.5)}).}

\item{contrib_breaks}{If they are to be changed, the breaks used for contributions to
variance. Default to \code{c(1, 2, 5, 10)} : first color used when the contribution of
a cell is superior to the mean contribution ; second color used when it is superior to
2 times the mean contribution ; etc. The global color (for example green or
red/orange) is given by the sign of the spread.}

\item{brk}{When missing, return all color breaks. Specify to return a given color
break, among \code{"pct"}, \code{"mean"}, \code{"contrib"}, \code{"pct_ci"} and
\code{"mean_ci"}.}
}
\value{
A \code{tibble} of class \code{tab}, possibly with colored reading helpers.
When there are two \code{row_vars} or more, a list of \code{tibble} of class \code{tab}.
All non-text columns are of class \code{\link{fmt}}, storing all
the data necessary to print formats and colors. Columns with \code{row_var} and
\code{tab_vars} are of class \code{factor} : every added \code{factor} will be
considered as a \code{tab_vars} and used for grouping. To add text columns without
using them in calculations, be sure they are of class \code{character}.

A list with the variables names.

A single logical.

Set global options \code{"tabxplor.color_style_type"} and
\code{"tabxplor.color_style_theme"}, used when printing \code{\link{tab}} objects.

A vector of crayon color functions, or a vector of color html codes.

Set the global option "tabxplor.color_breaks" as a list different double
vectors, and also returns it invisibly.

The color breaks as a double vector, or list of double vectors.
}
\description{
A full-featured function to create, manipulate and format many cross-tables
as one, using colors to make the printed tab more easily readable (in R terminal or
exported to Excel with \code{\link{tab_xl}}).
Since objects of class \code{tab} are also of class \code{tibble}, you can then use all
\pkg{dplyr} verbs to modify the result, like \code{\link[dplyr:select]{select}},
\code{\link[dplyr:arrange]{arrange}}, \code{\link[dplyr:filter]{filter}}
or \code{\link[dplyr:mutate]{mutate}}.

Only breaks for attractions/over-representations (in green) should be
given, as a vector of positive doubles, with length between 1 and 5.
Breaks for aversions/under-representations (in orange/red) will simply be the opposite.
}
\section{Functions}{
\itemize{
\item \code{tab_get_vars()}: Get the variables names of a \pkg{tabxplor} \code{tab}

\item \code{is_tab()}: a test function for class tabxplor_tab

\item \code{set_color_style()}: define the color style used to print \code{\link{tab}}.

\item \code{get_color_style()}: get color styles as \pkg{crayon} functions or html codes.

\item \code{set_color_breaks()}: set the breaks used to print colors

\item \code{get_color_breaks()}: get the breaks currently used to print colors

}}
\examples{
# Make a summary table with many col_vars, showing only one specific level :
\donttest{
library(dplyr)
first_lvs <- c("Married", "$25000 or more", "Strong republican", "Protestant")
data <- forcats::gss_cat \%>\% mutate(across(
  where(is.factor),
  ~ forcats::fct_relevel(., first_lvs[first_lvs \%in\% levels(.)])
))
tab_many(data, race, c(marital, rincome, partyid, relig, age, tvhours),
         levels = "first", pct = "row", chi2 = TRUE, color = "auto")
}

# Can be used with map and tribble to program several tables with different parameters
#  all at once, in a readable way:
\donttest{
library(purrr)
library(tibble)
pmap(
  tribble(
    ~row_var, ~col_vars       , ~pct , ~filter              , ~subtext               ,
    "race"  , "marital"       , "row", NULL                 , "Source: GSS 2000-2014",
    "relig" , c("race", "age"), "row", "year \%in\% 2000:2010", "Source: GSS 2000-2010",
    NA_character_, "race"     , "no" , NULL                 , "Source: GSS 2000-2014",
  ),
  .f = tab_many,
  data = forcats::gss_cat, color = "auto", chi2 = TRUE)
}
set_color_style(type = "bg")
set_color_breaks(
  pct_breaks = c(0.05, 0.15, 0.3),
  mean_breaks = c(1.15, 2, 4),
  contrib_breaks = c(1, 2, 5)
)
}
