% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/rm_non_ascii.R
\name{rm_non_ascii}
\alias{rm_non_ascii}
\title{Remove/Replace/Extract Non-ASCII}
\usage{
rm_non_ascii(text.var, trim = !extract, clean = TRUE,
  pattern = "@rm_non_ascii", replacement = "", extract = FALSE,
  dictionary = getOption("regex.library"), ascii.out = TRUE, ...)
}
\arguments{
\item{text.var}{The text variable.}

\item{trim}{logical.  If \code{TRUE} removes leading and trailing white
spaces.}

\item{clean}{trim logical.  If \code{TRUE} extra white spaces and escaped
character will be removed.}

\item{pattern}{A character string containing a regular expression (or
character string for \code{fixed = TRUE}) to be matched in the given
character vector.  Default, \code{@rm_non_ascii} uses the
\code{rm_non_ascii} regex from the regular expression dictionary from
the \code{dictionary} argument.  If \code{extract = FALSE}
\code{\link[base]{gsub}} is not used as with other \code{rm_XXX} functions,
rather \code{\link[base]{iconv}} with the \code{sub} argument set is used to
conduct the subbing.}

\item{replacement}{Replacement for matched \code{pattern}.}

\item{extract}{logical.  If \code{TRUE} the all non-ASCII strings are extracted
into a list of vectors.}

\item{dictionary}{A dictionary of canned regular expressions to search within
if \code{pattern} begins with \code{"@rm_"}.}

\item{ascii.out}{logical.  If \code{TRUE} output is given in non-ASCII format,
otherwise \code{"byte"} is used.}

\item{\dots}{ignored.}
}
\value{
Returns a character string with "all caps" removed.
}
\description{
Remove/replace/extract non-ASCII substring from a string.  This is the template used by
other \pkg{qdapRegex} \code{rm_XXX} functions.
}
\section{Warning}{
 \code{\link[base]{iconv}} is used within \code{rm_non_ascii}.
\code{\link[base]{iconv}}'s behavior across operating systems may not be
consistent.
}
\examples{
x <- c("Hello World", "Ekstr\\xf8m", "J\\xf6reskog", "bi\\xdfchen Z\\xfcrcher")
Encoding(x) <- "latin1"
x

rm_non_ascii(x)
rm_non_ascii(x, replacement="<<FLAG>>")
rm_non_ascii(x, extract=TRUE)
rm_non_ascii(x, extract=TRUE, ascii.out=FALSE)
}
\author{
\href{http://stackoverflow.com}{stackoverflow}'s MrFlick, hwnd, and Tyler Rinker <tyler.rinker@gmail.com>.
}
\references{
The email regular expression was taken from:
\url{http://stackoverflow.com/a/25469131/1000343}
}
\seealso{
\code{\link[base]{gsub}},
\code{\link[stringi]{stri_extract_all_regex}}

Other rm_.functions: \code{\link{rm_abbreviation}};
  \code{\link{rm_angle}}, \code{\link{rm_bracket}},
  \code{\link{rm_bracket_multiple}},
  \code{\link{rm_curly}}, \code{\link{rm_round}},
  \code{\link{rm_square}}; \code{\link{rm_between}},
  \code{\link{rm_between_multiple}};
  \code{\link{rm_caps_phrase}}; \code{\link{rm_caps}};
  \code{\link{rm_citation_tex}}; \code{\link{rm_citation}};
  \code{\link{rm_city_state_zip}};
  \code{\link{rm_city_state}}; \code{\link{rm_date}};
  \code{\link{rm_default}}; \code{\link{rm_dollar}};
  \code{\link{rm_email}}; \code{\link{rm_emoticon}};
  \code{\link{rm_endmark}}; \code{\link{rm_hash}};
  \code{\link{rm_nchar_words}}; \code{\link{rm_number}};
  \code{\link{rm_percent}}; \code{\link{rm_phone}};
  \code{\link{rm_postal_code}};
  \code{\link{rm_repeated_characters}};
  \code{\link{rm_repeated_phrases}};
  \code{\link{rm_repeated_words}}; \code{\link{rm_tag}};
  \code{\link{rm_time}}; \code{\link{rm_title_name}};
  \code{\link{rm_twitter_url}}, \code{\link{rm_url}};
  \code{\link{rm_white}}, \code{\link{rm_white_bracket}},
  \code{\link{rm_white_colon}},
  \code{\link{rm_white_comma}},
  \code{\link{rm_white_endmark}},
  \code{\link{rm_white_lead}},
  \code{\link{rm_white_lead_trail}},
  \code{\link{rm_white_multiple}},
  \code{\link{rm_white_punctuation}},
  \code{\link{rm_white_trail}}; \code{\link{rm_zip}}
}
\keyword{ascii}
\keyword{unicode}

