% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/write-parquet.R
\name{write_parquet}
\alias{write_parquet}
\title{Write a data frame to a Parquet file}
\usage{
write_parquet(
  x,
  file,
  schema = NULL,
  compression = c("snappy", "gzip", "zstd", "uncompressed"),
  encoding = NULL,
  metadata = NULL,
  row_groups = NULL,
  options = parquet_options()
)
}
\arguments{
\item{x}{Data frame to write.}

\item{file}{Path to the output file. If this is the string \code{":raw:"},
then the data frame is written to a memory buffer, and the memory
buffer is returned as a raw vector.}

\item{schema}{Parquet schema. Specify a schema to tweak the default
nanoparquet R -> Parquet type mappings. Use \code{\link[=parquet_schema]{parquet_schema()}} to
create a schema that you can use here, or \code{\link[=read_parquet_schema]{read_parquet_schema()}} to
use the schema of a Parquet file.}

\item{compression}{Compression algorithm to use. Currently \code{"snappy"}
(the default), \code{"gzip"}, \code{"zstd"}, and \code{"uncompressed"} are supported.}

\item{encoding}{Encoding to use. Possible values:
\itemize{
\item If \code{NULL}, the appropriate encoding is selected automatically:
\code{RLE} or \code{PLAIN} for \code{BOOLEAN} columns, \code{RLE_DICTIONARY} for other
columns with many repeated values, and \code{PLAIN} otherwise.
\item If It is a single (unnamed) character string, then it'll be used
for all columns.
\item If it is an unnamed character vector of encoding names of the same
length as the number of columns in the data frame, then those
encodings will be used for each column.
\item If it is a named character vector, then the named must be unique
and each name must match a column name, to specify the encoding of
that column. The special empty name (\code{""}) applies to the rest of
the columns. If there is no empty name, the rest of the columns
will use the default encoding.
}

If \code{NA_character_} is specified for a column, the default encoding is
used for the column.

If a specified encoding is invalid for a certain column type,
or nanoparquet does not implement it, \code{write_parquet()} throws an
error.

This version of nanoparquet supports the following encodings:
\code{PLAIN}, \code{GROUP_VAR_INT}, \code{PLAIN_DICTIONARY}, \code{RLE}, \code{BIT_PACKED}, \code{DELTA_BINARY_PACKED}, \code{DELTA_LENGTH_BYTE_ARRAY}, \code{DELTA_BYTE_ARRAY}, \code{RLE_DICTIONARY}, \code{BYTE_STREAM_SPLIT}.

See \link{parquet-encodings} for more about encodings.}

\item{metadata}{Additional key-value metadata to add to the file.
This must be a named character vector, or a data frame with columns
character columns called \code{key} and \code{value}.}

\item{row_groups}{Row groups of the Parquet file. If \code{NULL}, then the
\code{num_rows_per_row_group} option is used from the \code{options} argument,
see \code{\link[=parquet_options]{parquet_options()}}. Otherwise it must be an integer vector,
specifying the starts of the row groups.}

\item{options}{Nanoparquet options, see \code{\link[=parquet_options]{parquet_options()}}.}
}
\value{
\code{NULL}, unless \code{file} is \code{":raw:"}, in which case the Parquet
file is returned as a raw vector.
}
\description{
Writes the contents of an R data frame into a Parquet file.
}
\details{
\code{write_parquet()} converts string columns to UTF-8 encoding by calling
\code{\link[base:Encoding]{base::enc2utf8()}}. It does the same for factor levels.
}
\examples{
\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
# add row names as a column, because `write_parquet()` ignores them.
mtcars2 <- cbind(name = rownames(mtcars), mtcars)
write_parquet(mtcars2, "mtcars.parquet")
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[=read_parquet_metadata]{read_parquet_metadata()}}, \code{\link[=read_parquet]{read_parquet()}}.
}
