#' Check experiment data.
#'
#' Checks that the experiment description is well-formed and complete.
#'
#' Information about a full experiment can be assembled into a spreadsheet
#' (currently Excel, CSV and tab-delimited text formats are supported) and used
#' to process large numbers of files in one batch. This function checks the
#' spreadsheet to make sure that it is properly formed and that all the data
#' files referred to are present.
#'
#' The function can (and ideally should) be run with the same parameters as will
#' be used to call \code{\link{read_experiment}}, although many of the
#' parameters are not required for the check.
#'
#' The content of the spreadsheet, the presence and the content of any
#' supporting files are also checked. Checks do not cover validity of the raw
#' data, so it is still possible to have invalid data even if
#' \code{\link{check_experiment}} returns \code{TRUE} (although this suggests an
#' underlying problem with the raw data). Warning and error messages are
#' intended to be useful and help any format issues be quickly resolved.
#'
#' @param filename A spreadsheet file containing a description of the experiment
#'   or a JSON file containing an exported experiment archive.
#' @param format An experiment description for reading raw data can be provided
#'   as an Excel spreadsheet ('excel'; the default) or as a comma-delimited
#'   ('csv') or tab-delimited ("tab", "tsv", "txt" or "text") text file. The
#'   value 'json' indicates that the file is an archived experiment in the JSON
#'   format (as generated by \code{\link{export_json}}). Default (\code{NA}) is
#'   to guess the format from the file extension.
#' @param interpolate Ignored. For compatibility with
#'   \code{\link{read_experiment}}.
#' @param project.dir A directory path specifying where the files needed for
#'   processing the experiment are stored. Ignored if \code{format = "json"}.
#' @param data.dir A directory path specifying where the raw data are stored.
#'   This is a folder root and all paths specified in the spreadsheet. Ignored
#'   if \code{format = "json"}.
#' @param cluster Ignored. For compatibility with \code{\link{read_experiment}}.
#' @param author.note Ignored. For compatibility with
#'   \code{\link{read_experiment}}.
#' @param verbose Ignored. For compatibility with \code{\link{read_experiment}}.
#'
#' @return Invisibly returns \code{TRUE} for a successful check or \code{FALSE}
#'   otherwise.
#'
#' @seealso \code{\link{read_experiment}}, \code{\link{export_json}}.
#'
#' @examples
#' require(Rtrack)
#' experiment.description <- system.file("extdata", "Minimal_experiment.xlsx",
#'   package = "Rtrack")
#' check_experiment(experiment.description)

#' @importFrom readxl read_excel
#' @importFrom utils read.csv read.table
#' @importFrom stats na.omit
#' @importFrom crayon bold red green
#' @importFrom rjson fromJSON
#'
#' @export
check_experiment = function(filename, format = NA, interpolate = FALSE, project.dir = NA, data.dir = project.dir, cluster = NULL, author.note = "", verbose = FALSE){
	if(is.na(project.dir)) project.dir = dirname(filename)
	if(is.na(data.dir)) data.dir = dirname(filename)
	if(unlist(strsplit(project.dir, ""))[nchar(project.dir)] != "/") project.dir = paste0(project.dir, "/")
	if(unlist(strsplit(data.dir, ""))[nchar(data.dir)] != "/") data.dir = paste0(data.dir, "/")
	format = tolower(format)
	if(is.na(format)){
		if(tools::file_ext(filename) %in% c("json")){
			format = "json"
		}else if(tools::file_ext(filename) %in% c("xls", "xlsx")){
			format = "excel"
		}else if(tools::file_ext(filename) %in% c("csv")){
			format = "csv"
		}else if(tools::file_ext(filename) %in% c("tab", "tsv", "txt")){
			format = "tab"
		}else{
			warning("The file format cannot be established automatically. Please specify the 'format' parameter.")
			check.pass = FALSE
		}
	}
	experiment.data = NULL
	experiment.info = NULL
	check.pass = TRUE

 	if(format == "json"){
		if(!file.exists(filename)){
			warning(paste0("The experiment archive file ('", filename, "') does not exist!"))
			check.pass = FALSE
		}else{
			error.message = tryCatch({
				experiment.data = rjson::fromJSON(file = filename, simplify = FALSE)
				TRUE
			}, error = function(e){
				FALSE
			})
			warning(paste0("The file '",  filename, "' is not properly formed."))
			check.pass = FALSE
		}
		
		experiment.info = experiment.data[[1]]
		experiment.data = experiment.data[[2]]
		
		# Run check for required features
		required.fields = c("id", "target", "day", "trial", "arena_name", "raw.t", "raw.x", "raw.y", "t", "x", "y", "arena")
		for(field in required.fields){
			field.data = sapply(experiment.data, "[[", field)
			field.check = !(field.data == "" | sapply(field.data, is.null))
			if(!all(field.check)){
				if(length(which(!field.check)) > 1){
					warning(paste0("The experiment is missing the field '", field, "' for elements: ", paste(which(!field.check), collapse = "', '"), "."))
					check.pass = FALSE
				}else if(length(which(!field.check)) == 1){
					warning(paste0("The experiment is missing the field '", field, "' for element ", which(!field.check), "."))
					check.pass = FALSE
				}
			}
		}
		required.arena.fields = c("type", "trial.length", "arena.bounds") # goal and old.goal are not actually required
		arenas = lapply(experiment.data, "[[", "arena")
		for(field in required.arena.fields){
			field.data = sapply(arenas, "[[", field)
			field.check = !(field.data == "" | sapply(field.data, is.null))
			if(!all(field.check)){
				if(length(which(!field.check)) > 1){
					warning(paste0("The experiment is missing the field 'arena$", field, "' for elements: ", paste(which(!field.check), collapse = "', '"), "."))
					check.pass = FALSE
				}else if(length(which(!field.check)) == 1){
					warning(paste0("The experiment is missing the field 'arena$", field, "' for element ", which(!field.check), "."))
					check.pass = FALSE
				}
			}
		}
	} # end json check
	
 	if(format != "json"){
		if(format == "xls" | format == "xlsx" | format == "excel"){
			if(!file.exists(filename)){
				warning(paste0("The experiment description file ('", filename, "') does not exist!"))
				check.pass = FALSE
			}else{
				experiment.data = suppressMessages(as.data.frame(readxl::read_excel(filename, col_types = 'text'), stringsAsFactors = F))
				rownames(experiment.data) = experiment.data$TrackID
			}
		}else if(format == "csv"){
			if(!file.exists(filename)){
				warning(paste0("The experiment description file ('", filename, "') does not exist!"))
				check.pass = FALSE
			}else{
				experiment.data = utils::read.csv(filename, stringsAsFactors = F, check.names = FALSE)
				rownames(experiment.data) = experiment.data$TrackID
			}
		}else{
			if(!file.exists(filename)){
				warning(paste0("The experiment description file ('", filename, "') does not exist!"))
				check.pass = FALSE
			}else{ # An unknown file extension will be assumed to be a tab-delimited file. If that fails, crash out.
				success = tryCatch({
					utils::read.delim(filename, stringsAsFactors = F)
					TRUE
				}, error = function(e){
					FALSE
				}, warning = function(e){
					FALSE
				})
				if(!success){
					warning(paste0("The file '",  filename, "' is not properly formed."))
				}else{
					experiment.data = utils::read.delim(filename, stringsAsFactors = F, check.names = FALSE)
					rownames(experiment.data) = experiment.data$TrackID
				}
				check.pass = FALSE
			}
		} # end file type check
		
		if(!is.null(experiment.data)){
			# Run check for required features
			required.fields = c("_TrackID", "_TargetID", "_Day", "_Trial", "_Arena", "_TrackFile", "_TrackFileFormat")
			field.check = required.fields %in% colnames(experiment.data)
			if(!all(field.check)){
				if(length(which(!field.check)) > 1){
					warning(paste0("The experiment description is missing the required columns: ", paste(required.fields[!field.check], collapse = "', '"), "."))
					check.pass = FALSE
				}else if(length(which(!field.check)) == 1){
					warning(paste0("The experiment description is missing the required column ", paste(required.fields[!field.check], collapse = "', '"), "."))
					check.pass = FALSE
				}
			} # end field.check
			optional.fields = c("_TrackIndex") # Valid but not always required
			# Check remaining columns for valid names (compatible with JSON schema)
			user.fields = colnames(experiment.data)[!(colnames(experiment.data) %in% c(required.fields, optional.fields))]
			user.field.check = grepl("^[a-zA-Z][a-zA-Z0-9_]*$", user.fields) & !duplicated(user.fields)
			if(!all(user.field.check)){
				if(length(which(!user.field.check)) > 1){
					user.field.suggestions = gsub("^([0-9])", "X\\1", gsub("([^a-zA-Z0-9_])", "_", make.unique(user.fields)))[!user.field.check]
					warning(paste0("The following column names are not valid: ", paste0("'", user.fields[!user.field.check], "'", collapse = ", "), ". Consider replacing them with: ", paste0("'", user.field.suggestions, "'", collapse = ", "), "."))
					check.pass = FALSE
				}else if(length(which(!user.field.check)) == 1){
					user.field.suggestions = gsub("^([0-9])", "X\\1", gsub("([^a-zA-Z0-9_])", "_", make.unique(user.fields)))[!user.field.check]
					warning(paste0("The following column name '", user.fields[!user.field.check], "' is not valid. Consider replacing it with '", user.field.suggestions, "'."))
					check.pass = FALSE
				}
			}
			
			if(!file.exists(project.dir)){
				warning(paste0("The project directory '", project.dir, "'does not exist."))
				check.pass = FALSE
			}else{
				arenas.present = sapply(paste0(project.dir, stats::na.omit(unique(experiment.data[, "_Arena"]))), simplify = T, USE.NAMES = T, function(arenafile) file.exists(arenafile) )
				if(length(which(!arenas.present)) > 1){
					warning(paste0("The following arena description files cannot be found: ", paste(paste0("'", project.dir, names(arenas.present)[!arenas.present], "'"), collapse = ", "), "."))
					check.pass = FALSE
				}else if(length(which(!arenas.present)) == 1){
					warning(paste0("The arena description file '", names(arenas.present)[!arenas.present], "' does not exist."))
					check.pass = FALSE
				}else{
					# Check the arena files
					for(arena.name in stats::na.omit(unique(experiment.data[, "_Arena"]))){
						description = tryCatch(
							as.data.frame(t(utils::read.delim(paste0(project.dir, arena.name), header = F, sep = "=", strip.white = T, comment.char = "#", stringsAsFactors = F, row.names = 1)), stringsAsFactors = F),
							error = function(e){
								warning(paste0("There was a problem reading the arena description file '", filename, "'."))
								check.pass = FALSE
							}
						)
						if(description$type == "mwm"){
							# TODO finish this section
							trial.length.check = tryCatch({
								!is.na(as.numeric(description$trial.length))
							}, error = function(e){
								FALSE
							}, warning = function(e){
								FALSE
							})
							
							
							zones = intersect(c("arena.bounds", "goal", "old.goal"), colnames(description))
							zones.check = sapply(zones, function(zone){
								bounds = unlist(strsplit(description[[zone]], "\\s+"))
								if(bounds[1] == "circle"){
									bounds.check = tryCatch({
										all(length(bounds) == 4, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])))
									}, error = function(e){
										FALSE
									}, warning = function(e){
										FALSE
									})
								}else if(bounds[1] == "square"){
									bounds.check = tryCatch({
										all(length(bounds) == 5, !is.na(as.numeric(bounds[2])), !is.na(as.numeric(bounds[3])), !is.na(as.numeric(bounds[4])), !is.na(as.numeric(bounds[5])))
									}, error = function(e){
										FALSE
									}, warning = function(e){
										FALSE
									})
								}else{
									warning(paste0("The ", , " description for arena ", arena.name, " is not valid."))
									check.pass = FALSE
								}
								bounds.check
							})
							if(!all(zones.check)){
								if(length(which(!zones.check)) > 1){
									warning(paste0("In '", arena.name, "', the zone descriptions for the following zones are not valid: ",  paste0("'", names(zones.check)[!zones.check], "'", collapse = ", "), "."))
									check.pass = FALSE
								}else if(length(which(!zones.check)) == 1){
									warning(paste0("In '", arena.name, "', the zone description for '", names(zones.check)[!zones.check], "' is not valid."))
									check.pass = FALSE
								}
							}

						}else{
							warning(paste0("In '", arena.name, "', the arena type '", description$type, "' is not supported."))
							check.pass = FALSE
						}
					}
				}	
			} # end project.dir check
			
			if(!file.exists(data.dir)){
				warning(paste0("The data directory '", data.dir, "'does not exist."))
				check.pass = FALSE
			}else{
				track.format.info = unique(experiment.data[, "_TrackFileFormat"])
				supported.formats = suppressMessages(identify_track_format())
				track.formats = sapply(track.format.info, function(info) strsplit(info, "_")[[1]][1])
				paths.formatted = sapply(track.formats, simplify = T, USE.NAMES = T, function(trackformat) trackformat %in% supported.formats )
				if(length(which(!paths.formatted)) > 1){
					warning(paste0("The track formats ", paste(paste0("'", names(paths.formatted)[!paths.formatted], "'"), collapse = ", "), " are not supported."))
					check.pass = FALSE
				}else if(length(which(!paths.formatted)) == 1){
					warning(paste0("The track format '", names(paths.formatted)[!paths.formatted], "' is not supported."))
					check.pass = FALSE
				}
				paths.present = sapply(paste0(data.dir, stats::na.omit(unique(experiment.data[, "_TrackFile"]))), simplify = T, USE.NAMES = T, function(trackfile) file.exists(trackfile) )
				if(length(which(!paths.present)) > 1){
					warning(paste0("The following track files cannot be found: ", paste(paste0("'", data.dir, names(paths.present)[!paths.present], "'"), collapse = ", "), "."))
					check.pass = FALSE
				}else if(length(which(!paths.present)) == 1){
					warning(paste0("The track file ", paste(paste0("'", data.dir, names(paths.present)[!paths.present], "'"), collapse = ", "), "  cannot be found."))
					check.pass = FALSE
				}
			} # end data.dir check
			
		}else{
			warning(paste0("There was a problem reading the experiment file.")) # This should never happen. The file is valid but the reading into a data frame returned NULL.
			check.pass = FALSE
		} # end experiment.data check
 	} # end non-json check
	
	if(check.pass){
		message(paste0(crayon::green("\u2714"), crayon::green(" This experiment appears to be valid and complete.")))
	}else{
		message(paste0(crayon::red("\u2716"), crayon::red(" This experiment has some problems. Please check the documentation at https://rupertoverall.net/Rtrack.")))
	}
	invisible(check.pass)
}
