
# ----------------------------------- #
# Generation of Censored SMSN Samples #
# ----------------------------------- #

rSMSN <- function(n, mu, sigma2, lambda = 0, nu = NULL, family = "SN") {
  if (family %in% c("SN", "ST", "SCN")) {
    deltinha <- lambda / sqrt(1 + lambda^2)
    Delta <- sqrt(sigma2) * deltinha
    tau <- sigma2 * (1 - deltinha^2)

    if (family == "SN") {
      u <- rep(1, n)
    } else if (family == "ST") {
      u <- rgamma(n, shape = nu / 2, rate = nu / 2)
    } else if (family == "SCN") {
      p <- runif(n)
      u <- rep(1, n)
      u[p < nu[1]] <- nu[2]
    }

    T0 <- rnorm(n)
    T1 <- rnorm(n)
    T2 <- abs(T0) / sqrt(u)
    y <- mu + Delta * T2 + sqrt(tau / u) * T1
  }

  if (family == "N") {
    y <- rnorm(n, mean = mu, sd = sqrt(sigma2))
  }

  if (family == "T") {
    y <- mu + sqrt(sigma2) * rt(n, df = nu)
  }

  if (family == "CN") {
    p <- runif(n)
    u <- rep(1, n)
    u[p < nu[1]] <- nu[2]

    T1 <- rnorm(n)

    y <- mu + sqrt(sigma2 / u) * T1
  }

  return(y)
}


#' Generate simulated censored data under heavy‑tailed Distributions
#'
#' Simulates a univariate linear regression dataset with censoring  and/or missing values in the response variable, considering that the error follows a SMSN distribution.
#'
#' @param n Integer. Sample size to be generated.
#' @param x Numeric matrix of covariates (dimension \code{n x p}). Not contain missing values.
#' @param beta Numeric vector of regression coefficients of length \code{p}.
#' @param sigma2 Positive numeric scalar. Scale parameter of SMSN class.
#' @param lambda Numeric scalar. Shape parameter that controls the skewness in the SMSN class. Ignored when \code{family = "N"}, \code{"T"} or \code{"CN"}.
#' @param nu Distribution-specific parameter: for \code{"ST"} or \code{"T"}, \code{nu} is a scalar > 2 (degrees of freedom); for \code{"SCN"} or \code{"CN"}, a vector \code{(nu1, nu2)} with values in (0,1). Ignored for \code{"SN"} and \code{"N"}.
#' @param cens Character string indicating the type of censoring: \code{"Left"}, \code{"Right"} or \code{"Int"}. Default is \code{"Int"}.
#' @param pcens Proportion of censored observations. Must be between 0 and 1. Default is \code{0}.
#' @param pna Proportion of missing values (treated as extreme interval censoring). Must be between 0 and 1. Only allowed when \code{cens = "Int"}. Default is \code{0}.
#' @param family Character string indicating the error distribution family. Possible values: \code{"SN"} (Skew-Normal), \code{"ST"} (Skew-t), \code{"SCN"} (Skew Contaminated Normal), \code{"N"} (Normal), \code{"T"} (Student-t) and \code{"CN"} (Contaminated Normal). Default is \code{"ST"}.
#'
#' @details
#' The following procedures are applied to the generated response variable with incomplete observation:
#'
#' - **Left censoring**: values below a cutoff point (defined based on the \code{pcens}) are replaced by that cutoff, indicating that the true value is less than or equal to it.
#' - **Right censoring**: values above a cutoff point (also based on the \code{pcens}) are replaced by that value, indicating that the true value is greater than or equal to it.
#' - **Interval censoring**: a subset of observations is randomly selected (based on the \code{pcens}), and each value is replaced by an interval centered at the true value.
#' - **Missing data**: an additional subset of observations (defined based on the \code{pna}) is replaced by unbounded intervals of the form \code{(-Inf, Inf)}, representing complete uncertainty about the true value.
#'
#' @return A list with the following components:
#' \item{y}{Fully observed response values (uncensored).}
#' \item{yc}{Incomplete response values.}
#' \item{cc}{Censoring indicator. \code{0} for observed data and \code{1} for censored or missing case.}
#' \item{UL}{Vector of upper limits of the censoring interval. Equal to \code{NULL} for left or right censoring. For missing data, equal to \code{Inf}.}
#'
#' @export
#'
#' @examples
#' set.seed(1997)
#'
#' # Generate covariates and true parameter values
#' n      <- 500
#' x      <- cbind(1, rnorm(n))
#' beta   <- c(2, -1)
#' sigma2 <- 1
#' lambda <- 3
#' nu     <- 3
#'
#' # Generate a simulated dataset under SMSN-ICR model, with interval censoring and/or missing values
#' sample <- gen_SMSNCens_sample(n = n, x = x, beta = beta, sigma2 = sigma2,
#'                          lambda = lambda, nu = nu, cens = "Int",
#'                          pcens = 0.1, pna = 0.05, family = "ST")
#'
#' # Fit the SMSN-ICR model using the generated data
#' fit <- CensRegSMSN(sample$cc, x, sample$yc, cens = "Int", UL = sample$UL, get.init = TRUE,
#'                    show.envelope = TRUE, family = "ST")
gen_SMSNCens_sample <- function(n, x, beta, sigma2, lambda, nu, cens = "Int", pcens = 0, pna = 0, family = "ST") {

  if (length(n) == 0 )
    stop("The argument 'n' must be provided.")
  if( !is.numeric(n) || length(n) != 1 || n <= 0 || n != as.integer(n))
    stop("The argument 'n' must be a positive integer.")

  if (length(x) == 0)
    stop("The argument 'x' must be provided.")
  if (!is.matrix(x) || !is.numeric(x))
    stop("The argument 'x' must be a numeric matrix.")
  if (any(is.na(x)))
    stop("The matrix 'x' must not contain NA values.")
  if (nrow(x) != n)
    stop("The number of rows in 'x' must be equal to 'n'.")

  if (length(beta) == 0 || !is.numeric(beta))
    stop("The argument 'beta' must be provided and must be numeric.")

  p <- ncol(x)
  if (length(beta) != p)
    stop("'beta' must be a numeric vector with length equal to the number of columns in 'x'.")

  if (length(sigma2) == 0)
    stop("The argument 'sigma2' must be provided.")
  if(length(sigma2) != 1 || !is.numeric(sigma2) || sigma2 <= 0)
    stop("The argument 'sigma2' must be a positive numeric scalar.")

  if (length(lambda) == 0)
    stop("The argument 'lambda' must be provided.")
  if (!is.numeric(lambda) || length(lambda) != 1)
    stop("The argument 'lambda' must be a numeric scalar.")

  if (!(family %in% c("ST", "T", "SN", "N", "SCN", "CN")))
    stop(paste("Family", family, "not recognized."))

  if (family %in% c("T", "ST", "CN", "SCN")) {
    if (length(nu) == 0)
      stop(sprintf("When 'family = \"%s\"', the argument 'nu' must be provided.", family))
    if (!is.numeric(nu))
      stop("The argument 'nu' must be numeric.")
  }
  if (family %in% c("T", "ST")) {
    if (length(nu) != 1)
      stop("The argument 'nu' must be a scalar for the T or ST distribution.")
    if (nu <= 2)
      stop("The argument 'nu' must be greater than 2 for the T or ST distribution.")
  }
  if (family %in% c("CN", "SCN")) {
    if (length(nu) != 2)
      stop("The argument 'nu' must be a bidimensional vector for the CN or SCN distribution.")
    if (any(nu <= 0 | nu >= 1))
      stop("Both elements of 'nu' must lie in the interval (0, 1) for the CN or SCN distribution.")
  }

  if (!(cens %in% c("Right", "Left", "Int")))
    stop("Invalid censoring type. The argument 'cens' must be either 'Right', 'Left' or 'Int'.")

  if (!is.numeric(pcens) || pcens < 0 || pcens > 1)
    stop("The argument 'pcens' must be a numeric value between 0 and 1.")
  if (!is.numeric(pna) || pna < 0 || pna > 1)
    stop("The argument 'pna' must be a numeric value between 0 and 1.")
  if ((pcens + pna) > 1)
    stop("The sum of 'pcens' and 'pna' must not exceed 1.")
  if (pna > 0 && cens != "Int") {
    stop("The argument 'pna' (proportion of missing data) is only allowed when 'cens = \"Int\"'.")
  }

  if (family %in% c("SN", "ST", "SCN")) {
    deltinha <- lambda / sqrt(1 + lambda^2)
    Delta <- sqrt(sigma2) * deltinha

    if (family == "SN") {
      eta <- -sqrt(2 / pi)
    } else if (family == "ST") {
      k1 <- sqrt(nu / 2) * gamma((nu - 1) / 2) / gamma(nu / 2)
      eta <- -sqrt(2 / pi) * k1
    } else if (family == "SCN") {
      k1 <- (nu[1] / sqrt(nu[2])) + 1 - nu[1]
      eta <- -sqrt(2 / pi) * k1
    }
    mu <- eta * Delta
  } else {
    mu <- 0
  }

  error <- rSMSN(n = n, mu = mu, sigma2 = sigma2, lambda = lambda, nu = nu, family = family)
  y <- as.vector(x %*% beta) + error
  yc <- y

  cc <- rep(0, n)
  UL <- rep(0, n)

  if (pcens > 0) {
    if (cens == "Left") {
      cutoff <- quantile(yc, probs = pcens)
      cc <- as.numeric(yc <= cutoff)
      yc[cc == 1] <- cutoff
      UL <- NULL
    }

    if (cens == "Right") {
      cutoff <- quantile(yc, probs = 1 - pcens)
      cc <- as.numeric(yc >= cutoff)
      yc[cc == 1] <- cutoff
      UL <- NULL
    }

    if (cens == "Int") {
      UL           <- yc
      cc           <- rep(0, n)
      n_cens       <- ceiling(pcens * n)
      idx_cens     <- sample(1:n, n_cens)
      cc[idx_cens] <- 1

      sd_cens      <- sd(yc[idx_cens])
      UL[idx_cens] <- yc[idx_cens] + sd_cens
      yc[idx_cens] <- yc[idx_cens] - sd_cens

      idx_obs      <- setdiff(1:n, idx_cens)
      n_miss       <- ceiling(pna * n)
      idx_md       <- sample(idx_obs, n_miss)

      yc[idx_md]   <- -Inf
      UL[idx_md]   <- Inf
      cc[idx_md]   <- 1
    }
  }

  return(list(y = y, yc = yc, cc = cc, UL = UL))
}
