#' Estimating the Average Treatment Effect (ATE) in an internal target population using multi-source data
#'
#' @description
#' Doubly-robust and efficient estimator for the ATE in each internal target population using multi-source data.
#'
#' @param X Data frame (or matrix) containing the covariate data in the multi-source data. It should have \eqn{n} rows and \eqn{p} columns. Character variables will be converted to factors.
#' @param Y Vector of length \eqn{n} containing the outcome.
#' @param S Vector of length \eqn{n} containing the source indicator. If \code{S} is a factor, it will maintain its level order; otherwise it will be converted to a factor with the default level order. The order will be carried over to the outputs and plots.
#' @param A Vector of length \eqn{n} containing the binary treatment (1 for treated and 0 for untreated).
#' @param cross_fitting Logical specifying whether sample splitting and cross fitting should be used.
#' @param replications Integer specifying the number of sample splitting and cross fitting replications to perform, if \code{cross_fitting = TRUE}. The default is \code{10L}.
#' @param source_model Character string specifying the (penalized) multinomial logistic regression for estimating the source model. It has two options: "\code{MN.glmnet}" (default) and "\code{MN.nnet}", which use \pkg{glmnet} and \pkg{nnet} respectively.
#' @param source_model_args List specifying the arguments for the source model (in \pkg{glmnet} or \pkg{nnet}).
#' @param treatment_model_type Character string specifying how the treatment model is estimated. Options include "\code{separate}" (default) and "\code{joint}". If "\code{separate}", the treatment model (i.e., \eqn{P(A=1|X, S=s)}) is estimated by regressing \eqn{A} on \eqn{X} within each specific internal population \eqn{S=s}. If "\code{joint}", the treatment model is estimated by regressing \eqn{A} on \eqn{X} and \eqn{S} using the multi-source population.
#' @param treatment_model_args List specifying the arguments for the treatment model (in \pkg{SuperLearner}).
#' @param outcome_model_args List specifying the arguments for the outcome model  (in \pkg{SuperLearner}).
#' @param show_progress Logical specifying whether to print a progress bar for the cross-fit replicates completed, if \code{cross_fitting = TRUE}.
#'
#' @details
#' \strong{Data structure:}
#'
#' The multi-source dataset consists the outcome \code{Y}, source \code{S}, treatment \code{A}, and covariates \code{X} (\eqn{n \times p}) in the internal populations. The data sources can be trials, observational studies, or a combination of both.
#'
#' \strong{Estimation of nuissance parameters:}
#'
#' The following models are fit:
#' \itemize{
#' \item Propensity score model: \eqn{\eta_a(X)=P(A=a|X)}. We perform the decomposition \eqn{P(A=a|X)=\sum_{s} P(A=a|X, S=s)P(S=s|X)} and estimate \eqn{P(A=1|X, S=s)} (i.e., the treatment model) and \eqn{q_s(X)=P(S=s|X)} (i.e., the source model).
#' \item Outcome model: \eqn{\mu_a(X)=E(Y|X, A=a)}
#' }
#' The models are estimated by \pkg{SuperLearner} with the exception of the source model which is estimated by \pkg{glmnet} or \pkg{nnet}.
#'
#'
#' \strong{ATE estimation:}
#'
#' The ATE estimator is
#' \deqn{
#'  \dfrac{\widehat \kappa}{n}\sum\limits_{i=1}^{n} \Bigg[ I(S_i = s) \widehat \mu_a(X_i)
#'  +I(A_i = a) \dfrac{\widehat q_{s}(X_i)}{\widehat \eta_a(X_i)}  \Big\{ Y_i - \widehat \mu_a(X_i) \Big\} \Bigg],
#' }
#' where \eqn{\widehat \kappa=\{n^{-1} \sum_{i=1}^n I(S_i=s)\}^{-1}}. The estimator is doubly robust and non-parametrically efficient.
#'
#' To achieve non-parametric efficiency and asymptotic normality, it requires that \eqn{||\widehat \mu_a(X) -\mu_a(X)||\big\{||\widehat \eta_a(X) -\eta_a(X)||+||\widehat q_s(X) -q_s(X)||\big\}=o_p(n^{-1/2})}.
#' In addition, sample splitting and cross-fitting can be performed to avoid the Donsker class assumption.
#'
#' When a data source is a randomized trial, it is still recommended to estimate the propensity score for optimal efficiency.
#'
#' @return An object of class "ATE_internal". This object is a list with the following elements:
#'   \item{df_dif}{A data frame containing the treatment effect (mean difference) estimates for the internal populations.}
#'   \item{df_A0}{A data frame containing the potential outcome mean estimates under A = 0 for the internal populations.}
#'   \item{df_A1}{A data frame containing the potential outcome mean estimates under A = 1 for the internal populations.}
#'   \item{fit_outcome}{Fitted outcome model.}
#'   \item{fit_source}{Fitted source model.}
#'   \item{fit_treatment}{Fitted treatment model(s).}
#'
#' @references Robertson, S.E., Steingrimsson, J.A., Joyce, N.R., Stuart, E.A. and Dahabreh, I.J. (2021). \emph{Center-specific causal inference with multicenter trials: Reinterpreting trial evidence in the context of each participating center}. arXiv preprint arXiv:2104.05905.
#' @references Wang, G., McGrath, S., Lian, Y. and Dahabreh, I. (2024) \emph{CausalMetaR: An R package for performing causally interpretable meta-analyses}, arXiv preprint arXiv:2402.04341.
#'
#' @examples
#' \donttest{
#' ai <- ATE_internal(
#'   X = dat_multisource[, 1:10],
#'   Y = dat_multisource$Y,
#'   S = dat_multisource$S,
#'   A = dat_multisource$A,
#'   source_model = "MN.glmnet",
#'   source_model_args = list(),
#'   treatment_model_type = "separate",
#'   treatment_model_args = list(
#'     family = binomial(),
#'     SL.library = c("SL.glmnet", "SL.nnet", "SL.glm"),
#'     cvControl = list(V = 5L)
#'   ),
#'   outcome_model_args = list(
#'     family = gaussian(),
#'     SL.library = c("SL.glmnet", "SL.nnet", "SL.glm"),
#'     cvControl = list(V = 5L)
#'   )
#' )
#' }
#'
#' @export

ATE_internal <- function(
    X, # predictor matrix
    Y, # outcome
    S, # source
    A, # treatment
    cross_fitting = FALSE,
    replications = 10L,
    source_model = "MN.glmnet",
    source_model_args = list(),
    treatment_model_type = "separate",
    treatment_model_args = list(),
    outcome_model_args = list(),
    show_progress = TRUE
) {
  # For future possibilities
  treatment_model <- outcome_model <- "SuperLearner"

  # Total sample size
  n <- nrow(X)

  # Checking lengths of all data inputs
  if (length(Y) != n | length(S) != n | length(A) != n){
    stop(paste0('All data inputs for the sources must have the same length:\n',
                'X has length ', n, '.\n',
                'Y has length ', length(Y), '.\n',
                'S has length ', length(S), '.\n',
                'A has length ', length(A), '.'))
  }

  # Number of sources
  if (!is.factor(S)) S <- factor(S)
  unique_S <- levels(S)
  no_S <- length(unique_S)
  # S with one-hot encoding to prevent potential problems in SuperLearner
  S_dummy <- data.frame(model.matrix(~ 0 + S)[, -1])
  colnames(S_dummy) <- unique_S[-1]

  # Converting character variables into factor variables
  X <- as.data.frame(sapply(X, FUN = function(xx) {
    if (is.character(xx)) {
      factor(xx)
    } else {
      xx
    }
  }))
  # Converting factor variables into dummy variables
  X <- data.frame(model.matrix(~ ., data = X)[, -1])

  # Checking treatment data
  if (!is.numeric(A)){
    stop('A must be a numeric vector')
  }
  if (!all(A %in% c(0, 1))){
    stop('A must only take values 0 or 1')
  }

  if (source_model == "MN.nnet" & !('trace' %in% source_model_args)){
    source_model_args$trace <- FALSE
  }

  if (cross_fitting) {
    if (show_progress){
      pb <- progress::progress_bar$new(total = replications,
                                       clear = FALSE,
                                       format = 'Cross-fitting progress [:bar] :percent, Elapsed time :elapsed, Est. time remaining :eta')
    }
    ## sample splitting and cross fitting loop
    K <- 4L
    phi_array_cf <- phi_se_array_cf <- array(dim = c(no_S, 3, K, replications),
                                             dimnames = list(unique_S,
                                                             c("A = 1", "A = 0", "Difference"),
                                                             NULL,
                                                             NULL))
    for (r in 1:replications) {
      ### assign k in 0, 1, 2, 3 to each individual
      id_by_S <- partition <- vector(mode = "list", length = no_S)
      for (s in unique_S) {
        ids <- which(S == s)
        ns <- length(ids)
        partition[[s]] <- sample(rep(seq(K) - 1, length.out = ns))
        id_by_S[[s]] <- ids
      }
      for (k in 1:K) {
        test_id <- sm_id <- tm_id <- om_id <- integer()
        for (s in unique_S) {
          id_S <- id_by_S[[s]]
          part_S <- partition[[s]]
          test_id <- c(test_id, id_S[part_S == k %% K])
          sm_id <- c(sm_id, id_S[part_S == (k + 1) %% K])
          tm_id <- c(tm_id, id_S[part_S == (k + 2) %% K])
          om_id <- c(om_id, id_S[part_S == (k + 3) %% K])
        }
        X_test <- X[test_id, ]
        Y_test <- Y[test_id]
        A_test <- A[test_id]
        S_test <- S[test_id]

        X_sm <- X[sm_id, ]
        S_sm <- S[sm_id]

        X_tm <- X[tm_id, ]
        A_tm <- A[tm_id]
        S_tm <- S[tm_id]
        if (treatment_model_type == "joint") S_dummy_tm <- S_dummy[tm_id, ]

        X_om <- X[om_id, ]
        Y_om <- Y[om_id]
        A_om <- A[om_id]

        ## source model
        if (source_model %in% c("MN.glmnet", "MN.nnet")) {
          source_model_args$Y <- S_sm
          source_model_args$X <- X_sm
          source_model_args$newX <- X_test
          fit_source <- do.call(what = source_model, args = source_model_args)
          PrS_X <- fit_source$pred
        } else {
          stop("Currently only support `MN.glmnet` and `MN.nnet`.")
        }

        ## treatment model
        PrA_XS <- matrix(nrow = length(test_id), ncol = no_S)
        colnames(PrA_XS) <- unique_S
        if (treatment_model_type == "separate") {
          fit_treatment <- vector(mode = 'list', length = no_S)
          for (s in unique_S) {
            id_s <- which(S_tm == s)
            treatment_model_args$Y <- A_tm[id_s]
            treatment_model_args$X <- X_tm[id_s, ]
            fit_treatment_s <- do.call(what = treatment_model,
                                       args = treatment_model_args)
            PrA_XS[, s] <- predict.SuperLearner(fit_treatment_s, newdata = X_test)$pred
            fit_treatment[[s]] <- fit_treatment_s
          }
        } else if (treatment_model_type == "joint") {
          treatment_model_args$Y <- A_tm
          treatment_model_args$X <- data.frame(S_dummy_tm, X_tm)
          fit_treatment <- do.call(what = treatment_model,
                                   args = treatment_model_args)
          for (s in unique_S) {
            S_mat <- matrix(0, nrow = length(test_id), ncol = no_S - 1)
            colnames(S_mat) <- unique_S[-1]
            if (s %in% unique_S[-1]) S_mat[, s] <- 1
            PrA_XS[, s] <- predict.SuperLearner(fit_treatment,
                                                newdata = data.frame(S_mat, X_test))$pred
          }
        } else {
          stop("The 'treatment_model_type' has to be either 'separate' or 'joint'.")
        }

        ## outcome model
        outcome_model_args$Y <- Y_om
        outcome_model_args$X <- data.frame(A_om, X_om)
        fit_outcome <- do.call(what = outcome_model, args = outcome_model_args)
        pred_Y1 <- predict.SuperLearner(fit_outcome,
                                        newdata = data.frame(A_om = 1, X_test))$pred
        pred_Y0 <- predict.SuperLearner(fit_outcome,
                                        newdata = data.frame(A_om = 0, X_test))$pred
        predY_AX <- cbind(pred_Y1, pred_Y0)

        ## estimators
        eta1 <- rowSums(PrA_XS * PrS_X)
        eta0 <- rowSums((1 - PrA_XS) * PrS_X)

        phi <- phi_var <- matrix(nrow = no_S, ncol = 2)
        rownames(phi) <- rownames(phi_var) <- unique_S
        for (s in unique_S) {
          tmp1 <- tmp2 <- matrix(0, nrow = nrow(X_test), ncol = 2)
          I_xs <- which(S_test == s)
          kappa <- 1/(length(I_xs)/nrow(X_test))
          tmp1[I_xs, 1] <- pred_Y1[I_xs]
          tmp1[I_xs, 2] <- pred_Y0[I_xs]

          I_xa1 <- which(A_test == 1)
          I_xa0 <- which(A_test == 0)

          qs <- PrS_X[, s]

          tmp2[I_xa1, 1] <- qs[I_xa1]/eta1[I_xa1] * (Y_test[I_xa1] - pred_Y1[I_xa1])
          tmp2[I_xa0, 2] <- qs[I_xa0]/eta0[I_xa0] * (Y_test[I_xa0] - pred_Y0[I_xa0])

          tmp <- tmp1 + tmp2

          phi[s, ] <- kappa * colMeans(tmp)

          tmp1[I_xs, 1] <- pred_Y1[I_xs] - phi[s, 1]
          tmp1[I_xs, 2] <- pred_Y0[I_xs] - phi[s, 2]

          phi_var[s, ] <- kappa^2/nrow(X_test)^2 * colSums((tmp1 + tmp2)^2)
        }

        phi_array_cf[, , k, r] <- cbind(phi, phi[, 1] - phi[, 2])
        phi_se_array_cf[, , k, r] <- sqrt(cbind(phi_var, phi_var[, 1] + phi_var[, 2]))
      }
      if (show_progress){
        pb$tick()
      }
    }

    phi_array <- apply(apply(phi_array_cf,
                             MARGIN = c(1, 2, 4),
                             FUN = mean),
                       MARGIN = 1:2,
                       FUN = median)
    phi_se_array <- apply(apply(phi_se_array_cf,
                                MARGIN = c(1, 2, 4),
                                FUN = mean),
                          MARGIN = 1:2,
                          FUN = median)
    # end of ATE_internal with cross-fitting
  } else {
    # start of regular ATE_internal
    if (source_model %in% c("MN.glmnet", "MN.nnet")) {
      source_model_args$Y <- S
      source_model_args$X <- X
      fit_source <- do.call(what = source_model, args = source_model_args)
      PrS_X <- fit_source$pred
    } else {
      stop("Currently only support `MN.glmnet` and `MN.nnet`.")
    }

    PrA_XS <- matrix(nrow = n, ncol = no_S)
    colnames(PrA_XS) <- unique_S
    if (treatment_model_type == "separate") {
      fit_treatment <- vector(mode = 'list', length = no_S)
      for (s in unique_S) {
        id_s <- which(S == s)
        treatment_model_args$Y <- A[id_s]
        treatment_model_args$X <- X[id_s, ]
        fit_treatment_s <- do.call(what = treatment_model,
                                   args = treatment_model_args)
        PrA_XS[, s] <- predict.SuperLearner(fit_treatment_s, newdata = X)$pred
        fit_treatment[[s]] <- fit_treatment_s
      }
    } else if (treatment_model_type == "joint") {
      treatment_model_args$Y <- A
      treatment_model_args$X <- data.frame(S_dummy, X)
      fit_treatment <- do.call(what = treatment_model,
                               args = treatment_model_args)
      for (s in unique_S) {
        S_mat <- matrix(0, nrow = n, ncol = no_S - 1)
        colnames(S_mat) <- unique_S[-1]
        if (s %in% unique_S[-1]) S_mat[, s] <- 1
        PrA_XS[, s] <- predict.SuperLearner(fit_treatment,
                                            newdata = data.frame(S_mat, X))$pred
      }
    } else {
      stop("The 'treatment_model_type' has to be either 'separate' or 'joint'.")
    }

    outcome_model_args$Y <- Y
    outcome_model_args$X <- data.frame(A, X)
    fit_outcome <- do.call(what = outcome_model, args = outcome_model_args)
    pred_Y1 <- predict.SuperLearner(fit_outcome,
                                    newdata = data.frame(A = 1, X))$pred
    pred_Y0 <- predict.SuperLearner(fit_outcome,
                                    newdata = data.frame(A = 0, X))$pred
    predY_AX <- cbind(pred_Y1, pred_Y0)

    # estimators
    eta1 <- rowSums(PrA_XS * PrS_X)
    eta0 <- rowSums((1 - PrA_XS) * PrS_X)

    phi_array <- phi_se_array <- array(dim = c(no_S, 3),
                                       dimnames = list(unique_S,
                                                       c("A = 1", "A = 0", "Difference")))
    phi <- phi_var <- matrix(nrow = no_S, ncol = 2)
    rownames(phi) <- rownames(phi_var) <- unique_S
    for (s in unique_S) {
      tmp1 <- tmp2 <- matrix(0, nrow = n, ncol = 2)
      I_xs <- which(S == s)
      kappa <- 1/(length(I_xs)/n)
      tmp1[I_xs, 1] <- pred_Y1[I_xs]
      tmp1[I_xs, 2] <- pred_Y0[I_xs]

      I_xa1 <- which(A == 1)
      I_xa0 <- which(A == 0)

      qs <- PrS_X[, s]

      tmp2[I_xa1, 1] <- qs[I_xa1]/eta1[I_xa1] * (Y[I_xa1] - pred_Y1[I_xa1])
      tmp2[I_xa0, 2] <- qs[I_xa0]/eta0[I_xa0] * (Y[I_xa0] - pred_Y0[I_xa0])

      tmp <- tmp1 + tmp2

      phi[s, ] <- kappa * colMeans(tmp)

      tmp1[I_xs, 1] <- pred_Y1[I_xs] - phi[s, 1]
      tmp1[I_xs, 2] <- pred_Y0[I_xs] - phi[s, 2]

      phi_var[s, ] <- kappa^2/n^2 * colSums((tmp1 + tmp2)^2)
    }

    phi_array[, ] <- cbind(phi, phi[, 1] - phi[, 2])
    phi_se_array[, ] <- sqrt(cbind(phi_var, phi_var[, 1] + phi_var[, 2]))
  }

  lb <- phi_array - qnorm(p = 0.975) * phi_se_array
  ub <- phi_array + qnorm(p = 0.975) * phi_se_array

  df_dif <- df_A0 <- df_A1 <-
    data.frame(Source = unique_S,
               Estimate = NA,
               SE = NA,
               ci.lb = NA,
               ci.ub = NA)

  for (s in unique_S) {
    rowid <- which(df_dif$Source == s)
    df_dif$Estimate[rowid] <- phi_array[s, "Difference"]
    df_dif$SE[rowid] <- phi_se_array[s, "Difference"]
    df_dif$ci.lb[rowid] <- lb[s, "Difference"]
    df_dif$ci.ub[rowid] <- ub[s, "Difference"]

    df_A1$Estimate[rowid] <- phi_array[s, "A = 1"]
    df_A1$SE[rowid] <- phi_se_array[s, "A = 1"]
    df_A1$ci.lb[rowid] <- lb[s, "A = 1"]
    df_A1$ci.ub[rowid] <- ub[s, "A = 1"]

    df_A0$Estimate[rowid] <- phi_array[s, "A = 0"]
    df_A0$SE[rowid] <- phi_se_array[s, "A = 0"]
    df_A0$ci.lb[rowid] <- lb[s, "A = 0"]
    df_A0$ci.ub[rowid] <- ub[s, "A = 0"]
  }

  if (cross_fitting) {
    fit_outcome <- fit_source <- fit_treatment <- NULL
  }

  res <- list(df_dif = df_dif,
              df_A0 = df_A0,
              df_A1 = df_A1,
              fit_outcome = fit_outcome,
              fit_source = fit_source,
              fit_treatment = fit_treatment,
              outcome_model_args = outcome_model_args,
              source_model = source_model,
              treatment_model_args = treatment_model_args)

  res$source_names <- unique_S

  class(res) <- 'ATE_internal'

  return(res)
}


