## Apply the subset testing approach for multiple testing comparison
## One set of p-values are from the test for association for the main genetic effect: Pg
## Second set of p-values are from the test for GxE effects: Pge
#'Subset multiple hypothesis testing procedure to combine two steps of testing gene-environment interaction
#'in a two-step procedure.
#'
#'Run \code{\link{SST}} to adjust for multiple testing
#'while combining two steps of the GxE interaction testing procedure. The procedure is applicable for
#'a multivariate phenotype, as well as a univariate phenotype.
#' @param PVAL A data.frame with three columns.
#'  The first column (PVAL$SNP) provides the name of all SNPs or genetic variants tested.
#'  Second column (PVAL$G.P) contains the p-values of the variants obtained from testing
#'  an overall marginal genetic
#'  association between the multivariate phenotype and each genetic variant individually.
#' And the third column (PVAL$GE.P) contains the p-values obtained from testing overall GxE effect on the
#' multivariate phenotype in presence of possible marginal effect due to the genetic variant and
#' a marginal effect
#' due to the environmental variable. Number of rows in PVAL is the same as the number
#'  of genetic variants, and it has the same structure as in the output of \code{\link{mv_G_GE}}.
#'   No default.
#' @param Pg_thr_step1 A positive real number between 0 and 1 providing the p-value threshold
#'  to select the set of
#' promising SNPs in step 1. These selected SNPs will be tested for GxE effect in the second step.
#' Default is 0.005.
#' @param FWER_step2 A positive real number between 0 and 1 specifying the family-wise error rate
#' to be maintained in the second
#' step while identifying the genetic variants having a genome-wide significant overall GxE effect
#'  on the multivariate phenotype. Default is 0.05.
#' @return The output is a vector of SNPs identified to have a genome-wide significant overall GxE effect.
#' @references A Majumdar, KS Burch, S Sankararaman, B Pasaniuc, WJ Gauderman, JS Witte (2020)
#' A two-step approach to testing overall effect of gene-environment interaction for multiple phenotypes.
#' bioRxiv, doi: https://doi.org/10.1101/2020.07.06.190256
#'
#' @seealso \code{\link{WHT}}, \code{\link{mv_G_GE}}
#'
#' @export
SST <- function(PVAL, Pg_thr_step1 = 0.005, FWER_step2 = 0.05){

    chkPVAL(PVAL)
    FWER_step2 <- chk0_1cutoff(FWER_step2, "FWER_step2", 0.05)
    Pg_thr_step1 <- chk0_1cutoff(Pg_thr_step1, "Pg_thr_step1", 0.005)
    Pg <- as.numeric(PVAL$G.P)
    Pge <- as.numeric(PVAL$GE.P)
    snp_set <- as.character(PVAL$SNP)
    ## We can tune the default choice of Pg_thr_step1

    marg_G_posi = which(Pg < Pg_thr_step1)
    selected_step2_data = data.frame(snp = marg_G_posi, P = Pge[marg_G_posi])

    GE_posi = which(selected_step2_data$P < (FWER_step2/length(marg_G_posi)))
    GE_SNPs_data = selected_step2_data[GE_posi, ]      ## column 1: which SNPs have GxE based on 2-step corection
    GE_snps_2step = GE_SNPs_data$snp                   ## SNPs having GE identified by subset testing

    GEsnps <- snp_set[GE_snps_2step]
    return(GEsnps)
}

