#' @title Loads diagnoses into R.
#' @export
#'
#' @description Loads diagnoses information into the R environment.
#'
#' @param file string, full file path to Rad.txt.
#' @param merge_id string, column name to use to create \emph{ID_MERGE} column used to merge different datasets. Defaults to \emph{EPIC_PMRN},
#' as it is the preferred MRN in the RPDR system.
#' @param sep string, divider between hospital ID and MRN. Defaults to \emph{:}.
#' @param id_length string, indicating whether to modify MRN length based-on required values \emph{id_length = standard}, or to keep lengths as is \emph{id_length = asis}.
#' If \emph{id_length = standard} then in case of \emph{MGH, BWH, MCL, EMPI and PMRN} the length of the MRNs are corrected accordingly by adding zeros, or removing numeral from the beginning.
#' In other cases the lengths are unchanged. Defaults to \emph{standard}.
#' @param perc numeric, a number between 0-1 indicating which parsed ID columns to keep. Data present in \emph{perc x 100\%} of patients are kept.
#' @param na boolean, whether to remove columns with only NA values. Defaults to \emph{TRUE}.
#' @param identical boolean, whether to remove columns with identical values. Defaults to \emph{TRUE}.
#' @param nThread integer, number of threds to use by data.table for reading data.
#' @param mrn_type boolean, should data in \emph{MRN_Type} and \emph{MRN} be parsed. Defaults to \emph{FALSE}, as it is not advised to parse these for all data sources as it takes considerable time.
#'
#' @return data table, with diagnoses information.
#' \describe{
#'  \item{ID_MERGE}{numeric, defined IDs by \emph{merge_id}, used for merging later.}
#'  \item{ID_dia_EMPI}{string, Unique Partners-wide identifier assigned to the patient used to consolidate patient information
#'  from \emph{dia} datasource, corresponds to EMPI in RPDR. Data is formatted using pretty_mrn().}
#'  \item{ID_dia_PMRN}{string, Epic medical record number. This value is unique across Epic instances within the Partners network
#'  from \emph{dia} datasource, corresponds to EPIC_PMRN in RPDR. Data is formatted using pretty_mrn().}
#'  \item{ID_dia_loc}{string, if mrn_type == TRUE, then the data in \emph{MRN_Type} and \emph{MRN} are parsed into IDs corresponding to locations \emph{(loc)}. Data is formatted using pretty_mrn().}
#'  \item{time_dia}{POSIXct, Date when the diagnosis was noted, corresponds to Date in RPDR. Converted to POSIXct format.}
#'  \item{dia_name}{string, Name of the diagnosis, diagnosis-related group, or phenotype. For more information on available
#'  Phenotypes visit https://phenotypes.partners.org/phenotype_list.html, corresponds to Diagnosis_Name in RPDR.}
#'  \item{dia_code}{string, Diagnosis, diagnosis-related group, or phenotype code, corresponds to Code in RPDR.}
#'  \item{dia_code_type}{string, Standardized classification system or custom grouping associated with the diagnosis code, corresponds to Code_type in RPDR.}
#'  \item{dia_flag}{string, Qualifier for the diagnosis, if any, corresponds to Diagnosis_flag in RPDR.}
#'  \item{dia_enc_num}{string, Unique identifier of the record/visit. This values includes the source system, hospital, and a unique identifier within the source system, corresponds to Encounter_number in RPDR.}
#'  \item{dia_provider}{string, Provider of record for the encounter where the diagnosis was entered, corresponds to Provider in RPDR. Punctuation marks are removed.}
#'  \item{dia_clinic}{string, Specific department/location where the patient encounter took place, corresponds to Clinic in RPDR. Punctuation marks are removed.}
#'  \item{dia_hosp}{string, Facility where the encounter occurred, corresponds to Hospital in RPDR.}
#'  \item{dia_inpatient}{string, Identifies whether the diagnosis was noted during an inpatient or outpatient encounter, Punctuation marks are removed.}
#'  }
#'
#' @encoding UTF-8
#'
#' @examples \dontrun{
#' #Using defaults
#' d_dia <- load_dia(file = "test_Dia.txt")
#'
#' #Use sequential processing
#' d_dia <- load_dia(file = "test_Dia.txt", nThread = 1)
#'
#' #Use parallel processing and parse data in MRN_Type and MRN columns and keep all IDs
#' d_dia <- load_dia(file = "test_Dia.txt", nThread = 20, mrn_type = TRUE, perc = 1)
#' }

load_dia <- function(file, merge_id = "EMPI", sep = ":", id_length = "standard", perc = 0.6, na = TRUE, identical = TRUE, nThread = 4, mrn_type = FALSE) {

  DATA <- load_base(file = file, merge_id = merge_id, sep = sep, id_length = id_length, perc = perc, na = na, identical = identical, nThread = nThread, mrn_type = mrn_type, src = "dia")
  raw_id <- which(colnames(DATA) == "EMPI" | colnames(DATA) == "IncomingId")[1]
  data_raw <- DATA[, raw_id:dim(DATA)[2]]
  DATA     <- DATA[, 1:(raw_id-1)]

  #Add additional information
  DATA$time_dia    <- as.POSIXct(data_raw$Date, format = "%m/%d/%Y", tz = "est")
  DATA$dia_name       <- pretty_text(data_raw$Diagnosis_Name, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_code       <- pretty_text(data_raw$Code, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_code_type  <- pretty_text(data_raw$Code_Type, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_flag       <- pretty_text(data_raw$Diagnosis_Flag, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_enc_num    <- pretty_text(data_raw$Accession_Number, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_provider   <- pretty_text(data_raw$Provider, remove_after = FALSE, remove_white = FALSE)
  DATA$dia_clinic     <- pretty_text(data_raw$Clinic, remove_after = FALSE, remove_white = FALSE)
  DATA$dia_hosp       <- pretty_text(data_raw$Hospital, remove_after = FALSE, remove_punc = FALSE, remove_white = FALSE)
  DATA$dia_inpatient  <- pretty_text(data_raw$Inpatient_Outpatient, remove_after = FALSE, remove_white = FALSE)

  DATA <- remove_column(dt = DATA, na = na, identical = identical)

  return(DATA)
}
