% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ncbi_searcher.R
\name{ncbi_searcher}
\alias{ncbi_searcher}
\title{Search for gene sequences available for taxa from NCBI.}
\usage{
ncbi_searcher(taxa = NULL, id = NULL, seqrange = "1:3000",
  getrelated = FALSE, fuzzy = FALSE, limit = 500,
  entrez_query = NULL, hypothetical = FALSE, verbose = TRUE)
}
\arguments{
\item{taxa}{(character) Scientific name to search for.}

\item{id}{(\code{character}) Taxonomic id to search for. Not compatible with
argument \code{taxa}.}

\item{seqrange}{(character) Sequence range, as e.g., \code{"1:1000"}. This is the range of 
sequence lengths to search for. So \code{"1:1000"} means search for sequences from 1 to 1000
characters in length.}

\item{getrelated}{(logical) If \code{TRUE}, gets the longest sequences of a species
in the same genus as the one searched for. If \code{FALSE}, returns nothing if no match 
found.}

\item{fuzzy}{(logical) Whether to do fuzzy taxonomic ID search or exact
search. If \code{TRUE}, we use \code{xXarbitraryXx[porgn:__txid<ID>]},
but if \code{FALSE}, we use \code{txid<ID>}. Default: \code{FALSE}}

\item{limit}{(\code{numeric}) Number of sequences to search for and return.
Max of 10,000. If you search for 6000 records, and only 5000 are found,
you will of course only get 5000 back.}

\item{entrez_query}{(\code{character}; length 1) An Entrez-format query to
filter results with. This is useful to search for sequences with specific
characteristics. The format is the same as the one used to seach genbank.
(\url{https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})}

\item{hypothetical}{(\code{logical}; length 1) If \code{FALSE}, an attempt
will be made to not return hypothetical or predicted sequences judging from
accession number prefixs (XM and XR). This can result in less than the
\code{limit} being returned even if there are more sequences available,
since this filtering is done after searching NCBI.}

\item{verbose}{(logical) If \code{TRUE} (default), informative messages printed.}
}
\value{
\code{data.frame} of results if a single input is given. A list of
 \code{data.frame}s if multiple inputs are given.
}
\description{
Search for gene sequences available for taxa from NCBI.
}
\examples{
\dontrun{
# A single species
out <- ncbi_searcher(taxa="Umbra limi", seqrange = "1:2000")
# Get the same species information using a taxonomy id
out <- ncbi_searcher(id = "75935", seqrange = "1:2000")
# If the taxon name is unique, using the taxon name and id are equivalent
all(ncbi_searcher(id = "75935") ==  ncbi_searcher(taxa="Umbra limi"))
# If the taxon name is not unique, use taxon id
#  "266948" is the uid for the butterfly genus, but there is also a genus
#  of orchids with the
#  same name
nrow(ncbi_searcher(id = "266948")) ==  nrow(ncbi_searcher(taxa="Satyrium"))
# get list of genes available, removing non-unique
unique(out$gene_desc)
# does the string 'RAG1' exist in any of the gene names
out[grep("RAG1", out$gene_desc, ignore.case=TRUE),]

# A single species without records in NCBI
out <- ncbi_searcher(taxa="Sequoia wellingtonia", seqrange="1:2000",
  getrelated=TRUE)

# Many species, can run in parallel or not using plyr
species <- c("Salvelinus alpinus","Ictalurus nebulosus","Carassius auratus")
out2 <- ncbi_searcher(taxa=species, seqrange = "1:2000")
lapply(out2, head)
library("plyr")
out2df <- ldply(out2) # make data.frame of all
unique(out2df$gene_desc) # get list of genes available, removing non-unique
out2df[grep("12S", out2df$gene_desc, ignore.case=TRUE), ]

# Using the getrelated and entrez_query options
ncbi_searcher(taxa = "Olpidiopsidales", limit = 5, getrelated = TRUE,
            entrez_query = "18S[title] AND 28S[title]")

# get refseqs
one <- ncbi_searcher(taxa = "Salmonella enterica",
  entrez_query="srcdb_refseq[PROP]")
two <- ncbi_searcher(taxa = "Salmonella enterica")
}
}
\seealso{
\code{\link{ncbi_byid}}, \code{\link{ncbi_byname}}
}
\author{
Scott Chamberlain \email{myrmecocystus@gmail.com}, Zachary Foster
  \email{zacharyfoster1989@gmail.com}
}
