# =============================================================================
# MODULE 3: GEOMETRIC DISTANCE METRICS
# =============================================================================

#' Energy Distance (U-statistic form)
#'
#' Unbiased multivariate distribution comparison. Uses deterministic quantile
#' subsampling (no RNG dependency) for large datasets.
#'
#' @param x Numeric vector of first sample
#' @param y Numeric vector of second sample
#' @param max_n Maximum sample size before subsampling (NULL = no limit)
#' @return Energy distance, or NA_real_ if insufficient data
#' @export
#' @references
#' Szekely GJ, Rizzo ML (2013). Energy statistics: A class of statistics based
#' on distances. Journal of Statistical Planning and Inference 143(8):1249-72.
#' @examples
#' energy_distance(rnorm(30), rnorm(30, 2))
energy_distance <- function(x, y, max_n = NULL) {
  x <- x[is.finite(x)]
  y <- y[is.finite(y)]
  nx <- length(x)
  ny <- length(y)
  if (nx < 2 || ny < 2) return(NA_real_)

  # Deterministic subsampling via evenly-spaced quantiles
  if (!is.null(max_n) && is.finite(max_n) && max_n > 2) {
    if (nx > max_n) {
      x <- as.numeric(stats::quantile(x, probs = seq(0, 1, length.out = max_n), names = FALSE))
    }
    if (ny > max_n) {
      y <- as.numeric(stats::quantile(y, probs = seq(0, 1, length.out = max_n), names = FALSE))
    }
    nx <- length(x)
    ny <- length(y)
  }

  cross <- mean(outer(x, y, function(a, b) abs(a - b)))
  xx <- if (nx > 1) mean(as.numeric(stats::dist(x))) else 0
  yy <- if (ny > 1) mean(as.numeric(stats::dist(y))) else 0
  2 * cross - xx - yy
}

#' Wasserstein Distance (1D, p=1)
#'
#' Optimal transport distance between two univariate distributions.
#'
#' @param x First distribution (numeric vector)
#' @param y Second distribution (numeric vector)
#' @return Wasserstein distance, or NA_real_ if insufficient data
#' @export
#' @examples
#' wasserstein_1d(rnorm(50), rnorm(50, 2))
wasserstein_1d <- function(x, y) {
  x <- sort(x[is.finite(x)])
  y <- sort(y[is.finite(y)])
  if (length(x) < 2 || length(y) < 2) return(NA_real_)
  n <- max(length(x), length(y))
  x_q <- as.numeric(stats::quantile(x, probs = seq(0, 1, length.out = n), names = FALSE))
  y_q <- as.numeric(stats::quantile(y, probs = seq(0, 1, length.out = n), names = FALSE))
  mean(abs(x_q - y_q))
}

#' Log-Space Euclidean Distance
#'
#' For multiplicative noise processes (RT-QuIC fluorescence).
#'
#' @param x First distribution (positive values)
#' @param y Second distribution (positive values)
#' @return Log-space Euclidean distance, or NA_real_ if insufficient data
#' @export
#' @examples
#' log_euclidean_distance(rlnorm(30), rlnorm(30, 1))
log_euclidean_distance <- function(x, y) {
  x <- x[is.finite(x) & x > 0]
  y <- y[is.finite(y) & y > 0]
  if (length(x) < 2 || length(y) < 2) return(NA_real_)
  log_x <- log(x)
  log_y <- log(y)
  sqrt(sum((mean(log_x, na.rm = TRUE) - mean(log_y, na.rm = TRUE))^2 +
             (safe_sd(log_x) - safe_sd(log_y))^2))
}

#' Mahalanobis Distance in Parameter Space
#'
#' @param sample_params Named numeric vector of sample parameters
#' @param reference_mean Named numeric vector of reference means
#' @param reference_cov Covariance matrix of reference distribution
#' @return Mahalanobis distance, or NA_real_ if computation fails
#' @export
#' @examples
#' mahalanobis_distance(c(1, 2), c(0, 0), diag(2))
mahalanobis_distance <- function(sample_params, reference_mean, reference_cov) {
  if (any(is.na(sample_params)) || any(is.na(reference_mean))) return(NA_real_)
  tryCatch({
    diff <- as.numeric(sample_params - reference_mean)
    cov_inv <- solve(reference_cov + diag(1e-6, length(diff)))
    sqrt(t(diff) %*% cov_inv %*% diff)
  }, error = function(e) NA_real_)
}
