# =============================================================================
# PLATE NORMALIZATION & BOOTSTRAP
# =============================================================================

#' Plate-Level Normalization
#'
#' Normalizes TTT and MP values within each plate using negative control baselines.
#'
#' @param data Data frame with Treatment, TTT, MP columns
#' @param plate_col Name of the column containing plate identifiers
#' @param pc_pattern Regex pattern to identify positive controls
#' @param nc_pattern Regex pattern to identify negative controls
#' @param method Normalization method: "zscore" or "median_ratio"
#' @return Data frame with added TTT_norm and MP_norm columns
#' @export
#' @examples
#' \donttest{
#' set.seed(42)
#' df <- data.frame(
#'   Treatment = c(rep("Positive Control", 4), rep("Negative Control", 4),
#'                 rep("Sample_A", 4)),
#'   TTT = c(rnorm(4, 8, 1), rnorm(4, 72, 5), rnorm(4, 12, 3)),
#'   MP = c(rnorm(4, 100, 10), rnorm(4, 20, 5), rnorm(4, 85, 15)),
#'   Plate = rep("Plate1", 12)
#' )
#' normalized <- kwela_plate_normalize(df)
#' }
kwela_plate_normalize <- function(
    data,
    plate_col = "Plate",
    pc_pattern = "\\bPositive\\s*Control\\b|^POS\\b|\\bPC\\b",
    nc_pattern = "\\bNegative\\s*Control\\b|^NEG\\b|\\bNC\\b|\\bTDB\\b|\\bBlank\\b",
    method = c("zscore", "median_ratio")
) {
  method <- match.arg(method)
  df <- as.data.frame(data)

  required <- c("Treatment", "TTT", "MP")
  missing <- setdiff(required, names(df))
  if (length(missing) > 0) {
    stop(sprintf("kwela_plate_normalize: missing columns: %s", paste(missing, collapse = ", ")))
  }
  if (!plate_col %in% names(df)) {
    stop(sprintf("Plate column '%s' not found", plate_col))
  }

  df$plate_id <- df[[plate_col]]
  df$TTT_norm <- NA_real_
  df$MP_norm <- NA_real_

  for (pl in unique(df$plate_id)) {
    pl_idx <- which(df$plate_id == pl)
    pl_data <- df[pl_idx, ]
    is_nc <- grepl(nc_pattern, pl_data$Treatment, ignore.case = TRUE, perl = TRUE)
    if (sum(is_nc) < 2) {
      warning(sprintf("Plate '%s': <2 NC wells", pl))
      next
    }

    nc_ttt <- pl_data$TTT[is_nc]
    nc_ttt <- nc_ttt[is.finite(nc_ttt)]
    nc_mp <- pl_data$MP[is_nc]
    nc_mp <- nc_mp[is.finite(nc_mp)]

    if (method == "zscore") {
      tc <- stats::median(nc_ttt, na.rm = TRUE)
      ts <- robust_scale(nc_ttt)
      mc <- stats::median(nc_mp, na.rm = TRUE)
      ms <- robust_scale(nc_mp)
      if (is.finite(tc) && is.finite(ts) && ts > 0)
        df$TTT_norm[pl_idx] <- (df$TTT[pl_idx] - tc) / ts
      if (is.finite(mc) && is.finite(ms) && ms > 0)
        df$MP_norm[pl_idx] <- (df$MP[pl_idx] - mc) / ms
    } else {
      tref <- stats::median(nc_ttt, na.rm = TRUE)
      mref <- stats::median(nc_mp, na.rm = TRUE)
      if (is.finite(tref) && abs(tref) > .Machine$double.eps)
        df$TTT_norm[pl_idx] <- df$TTT[pl_idx] / tref
      if (is.finite(mref) && abs(mref) > .Machine$double.eps)
        df$MP_norm[pl_idx] <- df$MP[pl_idx] / mref
    }
  }

  df
}


#' Bootstrap Confidence Intervals for Treatment Scores
#'
#' Computes bootstrap confidence intervals for mean scores and positive rates.
#' For reproducibility, set a seed before calling this function.
#'
#' @param result Output from kwela_analyze
#' @param B Number of bootstrap replicates (default 1000)
#' @param conf Confidence level (default 0.95)
#' @return Data frame with treatment-level bootstrap statistics including
#'   mean score, score CI, positive rate, and positive rate CI
#' @export
#' @examples
#' \donttest{
#' set.seed(42)
#' df <- data.frame(
#'   Treatment = c(rep("Positive Control", 8), rep("Negative Control", 8),
#'                 rep("Sample_A", 8)),
#'   TTT = c(rnorm(8, 8, 1), rnorm(8, 72, 5), rnorm(8, 12, 3)),
#'   MP = c(rnorm(8, 100, 10), rnorm(8, 20, 5), rnorm(8, 85, 15))
#' )
#' result <- kwela_analyze(df, verbose = FALSE)
#' set.seed(123)
#' boot_summary <- kwela_bootstrap_summary(result, B = 500)
#' }
kwela_bootstrap_summary <- function(result, B = 1000, conf = 0.95) {
  samples <- result[result$Type == "Sample", ]
  treatments <- unique(samples$Treatment)
  alpha <- 1 - conf
  pos_levels <- c("POSITIVE")

  summaries <- lapply(treatments, function(trt) {
    subset <- samples[samples$Treatment == trt, ]
    n <- nrow(subset)
    if (n < 2) return(data.frame(
      Treatment = trt, n_wells = n,
      mean_score = mean(subset$well_score, na.rm = TRUE),
      score_lo = NA_real_, score_hi = NA_real_,
      positive_rate = NA_real_, pr_lo = NA_real_, pr_hi = NA_real_,
      stringsAsFactors = FALSE))

    bs <- numeric(B)
    bp <- numeric(B)
    for (b in seq_len(B)) {
      bi <- sample(n, n, replace = TRUE)
      bs[b] <- mean(subset$well_score[bi], na.rm = TRUE)
      bp[b] <- sum(subset$classification[bi] %in% pos_levels) / n
    }

    data.frame(
      Treatment = trt, n_wells = n,
      mean_score = mean(subset$well_score, na.rm = TRUE),
      score_lo = as.numeric(stats::quantile(bs, alpha / 2, na.rm = TRUE, names = FALSE)),
      score_hi = as.numeric(stats::quantile(bs, 1 - alpha / 2, na.rm = TRUE, names = FALSE)),
      positive_rate = sum(subset$classification %in% pos_levels) / n,
      pr_lo = as.numeric(stats::quantile(bp, alpha / 2, na.rm = TRUE, names = FALSE)),
      pr_hi = as.numeric(stats::quantile(bp, 1 - alpha / 2, na.rm = TRUE, names = FALSE)),
      stringsAsFactors = FALSE)
  })

  result_df <- do.call(rbind, summaries)
  result_df[order(-result_df$positive_rate, -result_df$mean_score), ]
}
