% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xGRviaGenomicAnno.r
\name{xGRviaGenomicAnno}
\alias{xGRviaGenomicAnno}
\title{Function to conduct region-based enrichment analysis using genomic annotations via binomial test}
\usage{
xGRviaGenomicAnno(data.file, annotation.file = NULL, background.file =
NULL,
format.file = c("data.frame", "bed", "chr:start-end", "GRanges"),
build.conversion = c(NA, "hg38.to.hg19", "hg18.to.hg19"),
resolution = c("bases", "regions", "hybrid"),
background.annotatable.only = T, p.tail = c("one-tail", "two-tails"),
p.adjust.method = c("BH", "BY", "bonferroni", "holm", "hochberg",
"hommel"),
GR.annotation = NA, verbose = T,
RData.location = "http://galahad.well.ox.ac.uk/bigdata")
}
\arguments{
\item{data.file}{an input data file, containing a list of genomic
regions to test. If the input file is formatted as a 'data.frame'
(specified by the parameter 'format.file' below), the first three
columns correspond to the chromosome (1st column), the starting
chromosome position (2nd column), and the ending chromosome position
(3rd column). If the format is indicated as 'bed' (browser extensible
data), the same as 'data.frame' format but the position is 0-based
offset from chromomose position. If the genomic regions provided are
not ranged but only the single position, the ending chromosome position
(3rd column) is allowed not to be provided. If the format is indicated
as "chr:start-end", instead of using the first 3 columns, only the
first column will be used and processed. If the file also contains
other columns, these additional columns will be ignored. Alternatively,
the input file can be the content itself assuming that input file has
been read. Note: the file should use the tab delimiter as the field
separator between columns.}

\item{annotation.file}{an input annotation file containing genomic
annotations for genomic regions. If the input file is formatted as a
'data.frame', the first four columns correspond to the chromosome (1st
column), the starting chromosome position (2nd column), the ending
chromosome position (3rd column), and the genomic annotations (eg
transcription factors and histones; 4th column). If the format is
indicated as 'bed', the same as 'data.frame' format but the position is
0-based offset from chromomose position. If the format is indicated as
"chr:start-end", the first two columns correspond to the
chromosome:start-end (1st column) and the genomic annotations (eg
transcription factors and histones; 2nd column). If the file also
contains other columns, these additional columns will be ignored.
Alternatively, the input file can be the content itself assuming that
input file has been read. Note: the file should use the tab delimiter
as the field separator between columns.}

\item{background.file}{an input background file containing a list of
genomic regions as the test background. The file format is the same as
'data.file'. By default, it is NULL meaning all annotatable bases (ig
non-redundant bases covered by 'annotation.file') are used as
background. However, if only one annotation (eg only a transcription
factor) is provided in 'annotation.file', the background must be
provided.}

\item{format.file}{the format for input files. It can be one of
"data.frame", "chr:start-end", "bed" and "GRanges"}

\item{build.conversion}{the conversion from one genome build to
another. The conversions supported are "hg38.to.hg19" and
"hg18.to.hg19". By default it is NA (no need to do so)}

\item{resolution}{the resolution of overlaps being tested. It can be
one of "bases" at the base resolution (by default), "regions" at the
region resolution, and "hybrid" at the base-region hybrid resolution
(that is, data at the region resolution but annotation/background at
the base resolution). If regions being analysed are SNPs themselves,
then the results are the same even when choosing this parameter as
either 'bases' or 'hybrid' or 'regions'}

\item{background.annotatable.only}{logical to indicate whether the
background is further restricted to annotatable bases (covered by
'annotation.file'). In other words, if the background is provided, the
background bases are those after being overlapped with annotatable
bases. Notably, if only one annotation (eg only a transcription factor)
is provided in 'annotation.file', it should be false}

\item{p.tail}{the tail used to calculate p-values. It can be either
"two-tails" for the significance based on two-tails (ie both over- and
under-overrepresentation) or "one-tail" (by default) for the
significance based on one tail (ie only over-representation)}

\item{p.adjust.method}{the method used to adjust p-values. It can be
one of "BH", "BY", "bonferroni", "holm", "hochberg" and "hommel". The
first two methods "BH" (widely used) and "BY" control the false
discovery rate (FDR: the expected proportion of false discoveries
amongst the rejected hypotheses); the last four methods "bonferroni",
"holm", "hochberg" and "hommel" are designed to give strong control of
the family-wise error rate (FWER). Notes: FDR is a less stringent
condition than FWER}

\item{GR.annotation}{the genomic regions of annotation data. By
default, it is 'NA' to disable this option. Pre-built genomic
annotation data are detailed in the section 'Note'. Alternatively, the
user can also directly provide a customised GR object (or a list of GR
objects)}

\item{verbose}{logical to indicate whether the messages will be
displayed in the screen. By default, it sets to false for no display}

\item{RData.location}{the characters to tell the location of built-in
RData files. See \code{\link{xRDataLoader}} for details}
}
\value{
a data frame with following columns (below explanations are based on
results at the 'hybrid' resolution):
\itemize{
\item{\code{name}: the annotation name}
\item{\code{nAnno}: the number of bases covered by that annotation. If
the background is provided, they are also restricted by this}
\item{\code{nOverlap}: the number of regions overlapped between input
regions and annotation regions. If the background is provided, they are
also restricted by this}
\item{\code{fc}: fold change}
\item{\code{zscore}: z-score}
\item{\code{pvalue}: p-value}
\item{\code{adjp}: adjusted p-value. It is the p value but after being
adjusted for multiple comparisons}
\item{\code{or}: a vector containing odds ratio}
\item{\code{CIl}: a vector containing lower bound confidence interval
for the odds ratio}
\item{\code{CIu}: a vector containing upper bound confidence interval
for the odds ratio}
\item{\code{expProb}: the probability of expecting bases overlapped
between background regions and annotation regions}
\item{\code{obsProb}: the probability of observing regions overlapped
between input regions and annotation regions}
}
}
\description{
\code{xGRviaGenomicAnno} is supposed to conduct region-based enrichment
analysis for the input genomic region data (genome build h19), using
genomic annotations (eg active chromatin, transcription factor binding
sites/motifs, conserved sites). Enrichment analysis is based on
binomial test for estimating the significance of overlaps either at the
base resolution, at the region resolution or at the hybrid resolution.
Test background can be provided; by default, the annotatable will be
used.
}
\note{
Pre-built genomic annotation data are detailed in
\code{\link{xDefineGenomicAnno}}.
}
\examples{
# Load the XGR package and specify the location of built-in data
library(XGR)
RData.location <- "http://galahad.well.ox.ac.uk/bigdata"

\dontrun{
# Enrichment analysis for GWAS SNPs from ImmunoBase
## a) provide input data
data.file <- "http://galahad.well.ox.ac.uk/bigdata/ImmunoBase_GWAS.bed"

## b) perform enrichment analysis using FANTOM expressed enhancers
### one-tail p-value calculation (by default)
eTerm <- xGRviaGenomicAnno(data.file, format.file="bed",
GR.annotation="FANTOM5_Enhancer_Cell", RData.location=RData.location)
### alternatively: two-tails p-value calculation (useful to identify depletions)
eTerm_2 <- xGRviaGenomicAnno(data.file, format.file="bed",
GR.annotation="FANTOM5_Enhancer_Cell", p.tail="two-tails",
RData.location=RData.location)

## c) view enrichment results for the top significant terms
xEnrichViewer(eTerm)

## d) barplot of enriched terms
bp <- xEnrichBarplot(eTerm, top_num='auto', displayBy="fc")
bp

## e) forest plot of enriched terms
gp <- xEnrichForest(eTerm)
gp

## f) save enrichment results to the file called 'Regions_enrichments.txt'
output <- xEnrichViewer(eTerm, top_num=length(eTerm$adjp),
sortBy="adjp", details=TRUE)
utils::write.table(output, file="Regions_enrichments.txt", sep="\\t",
row.names=FALSE)

##########################################
### Advanced use: customised GR.annotation
##########################################
FANTOM5_CAT_Cell <- xRDataLoader('FANTOM5_CAT_Cell',
RData.location=RData.location)
ls_gr_lncRNA <- lapply(FANTOM5_CAT_Cell, function(x)
x[grep('lncRNA',x$Category)])
ls_gr_mRNA <- lapply(FANTOM5_CAT_Cell, function(x)
x[grep('coding_mRNA',x$Category)])
GR.annotations <- c("ls_gr_lncRNA","ls_gr_mRNA","FANTOM5_CAT_Cell")
ls_df <- lapply(1:length(GR.annotations), function(i){
GR.annotation <- get(GR.annotations[i])
df <- xGRviaGenomicAnno(data.file=data.file, format.file="bed",
GR.annotation=GR.annotation, RData.location=RData.location)
df$group <- GR.annotations[i]
return(df)
})
df <- do.call(rbind, ls_df)
gp <- xEnrichHeatmap(df, fdr.cutoff=0.05, displayBy="zscore")

##########################################
### Advanced use: customised EpigenomeAtlas_15Segments
##########################################
info <- xRDataLoader('EpigenomeAtlas_15Segments_info',
RData.location=RData.location)
GR.annotations <- paste0('EpigenomeAtlas_15Segments_',names(info))
names(GR.annotations) <- info
ls_df <- lapply(1:length(GR.annotations), function(i){
GR.annotation <- GR.annotations[i]
message(sprintf("Analysing '\%s' (\%s) ...", names(GR.annotation),
as.character(Sys.time())), appendLF=T)
df <- xGRviaGenomicAnno(data.file=data.file, format.file="bed",
GR.annotation=GR.annotation, RData.location=RData.location, verbose=F)
df$group <- names(GR.annotation)
return(df)
})
df <- do.call(rbind, ls_df)
gp <- xEnrichHeatmap(df, fdr.cutoff=0.05, displayBy="fdr",
reorder="both")

}
}
\seealso{
\code{\link{xDefineGenomicAnno}}
}
