#' evalute ramSearch, MSFinder mssearch, MSFinder Structure, MSFinder Formula, and findmain output to annotate spectra of ramclustR object
#'
#' After running RAMSearch (msp) and MSFinder on .mat or .msp files, import the spectral search results
#' @param ramclustObj R object - the ramclustR object which was used to write the .mat or .msp files
#' @param msfinder.dir full path to MSFinder directory - used for naming refinement
#' @param standardize.names logical: if TRUE, use inchikey for standardized chemical name lookup (http://cts.fiehnlab.ucdavis.edu/)
#' @param min.msms.score numerical: what is the minimum MSFinder similarity score acceptable.  default = 3.5
#' @details this function imports the output from the MSFinder program to annotate the ramclustR object
#' @return an updated ramclustR object, with the RC$ann and RC$ann.conf slots updated to annotated based on output from 1. ramsearch output, 2. msfinder mssearch, 3. msfinder predicted structure, 4. msfinder predicted formula, and 5. interpretMSSpectrum inferred molecular weight, with listed order as priority.  
#' @references Broeckling CD, Afsar FA, Neumann S, Ben-Hur A, Prenni JE. RAMClust: a novel feature clustering method enables spectral-matching-based annotation for metabolomics data. Anal Chem. 2014 Jul 15;86(14):6812-7. doi: 10.1021/ac501530d.  Epub 2014 Jun 26. PubMed PMID: 24927477.
#' @references Broeckling CD, Ganna A, Layer M, Brown K, Sutton B, Ingelsson E, Peers G, Prenni JE. Enabling Efficient and Confident Annotation of LC-MS Metabolomics Data through MS1 Spectrum and Time Prediction. Anal Chem. 2016 Sep 20;88(18):9226-34. doi: 10.1021/acs.analchem.6b02479. Epub 2016 Sep 8. PubMed PMID: 7560453.
#' @references Tsugawa H, Kind T, Nakabayashi R, Yukihira D, Tanaka W, Cajka T, Saito K, Fiehn O, Arita M. Hydrogen Rearrangement Rules: Computational MS/MS Fragmentation and Structure Elucidation Using MS-FINDER Software. Anal Chem. 2016 Aug 16;88(16):7946-58. doi: 10.1021/acs.analchem.6b00770. Epub 2016 Aug 4. PubMed PMID: 27419259.
#' @references http://cts.fiehnlab.ucdavis.edu/static/download/CTS2-MS2015.pdf 
#' @keywords 'ramclustR' 'RAMClustR', 'ramclustR', 'metabolomics', 'mass spectrometry', 'clustering', 'feature', 'xcms', 'MSFinder'
#' @author Corey Broeckling
#' @export 


annotate<-function(ramclustObj = NULL,
                   msfinder.dir = "C:/MSFinder/MS-FINDER program ver. 2.40",
                   standardize.names = FALSE,
                   min.msms.score = 3.5
) {
  
  if(!dir.exists(msfinder.dir)) {
    stop("msfinder directory does not exist: please set 'msfinder.dir' option as your full msfinder directory path")
  }
  
  use.short.inchikey = TRUE
  
  sfile<-list.files(paste0(msfinder.dir, "/Resources"), pattern = "ExistStructureDB_vs")
  # if(length(sfile)==0) {
  #   stop("no structure DB file found in msfinder.dir / Resources")
  # }
  # 
  if(length(sfile) > 1) {
    sfile <- sfile[which.max(as.numeric(gsub(".esd", "", gsub("ExistStructureDB_vs", "", sfile))))]
  }
  
  if(length(sfile) == 0) {
    
    sfile<-list.files(paste0(msfinder.dir, "/Resources"), pattern = "MsfinderStructureDB-VS")
    # if(length(sfile)==0) {
    #   stop("no structure DB file found in msfinder.dir / Resources")
    # }
    # 
    if(length(sfile) > 1) {
      sfile <- sfile[which.max(as.numeric(gsub(".esd", "", gsub("MsfinderStructureDB-VS", "", sfile))))]
    }
  }
  
  
  d<-read.delim2(paste0(msfinder.dir, "/Resources/", sfile), header = TRUE, na.strings = "N/A", quote = "", stringsAsFactors = FALSE)
  
  if(any(names(ramclustObj)=="M")) {
    findmain = TRUE
  } else {findmain = FALSE}
  
  if(any(names(ramclustObj) == "msfinder.formula")) {
    formula = TRUE
  } else {formula = FALSE}
  
  if(any(names(ramclustObj) == "msfinder.structure")) {
    structure = TRUE
  } else {structure = FALSE}
  
  if(any(names(ramclustObj) == "msfinder.mssearch.details")) {
    mssearch = TRUE
  } else {mssearch = FALSE}
  
  if(!any(names(ramclustObj) == "inchikey")) {ramclustObj$inchikey <- rep(NA, length(ramclustObj$cmpd))}
  if(!any(names(ramclustObj) == "inchi")) {ramclustObj$inchi<- rep(NA, length(ramclustObj$cmpd))}
  if(!any(names(ramclustObj) == "smiles"))  {ramclustObj$smiles <- rep(NA, length(ramclustObj$cmpd))}
  if(!any(names(ramclustObj) == "dbid")) {ramclustObj$dbid<- rep(NA, length(ramclustObj$cmpd))}
  if(!any(names(ramclustObj) == "synonyms")) {
    ramclustObj$synonyms <- as.list(rep(NA, length(ramclustObj$cmpd)))
  }
  
  
  if(mssearch) {
    for(i in 1:length(ramclustObj$ann)) {
      if((nrow(ramclustObj$msfinder.mssearch.details[[i]]$summary)>0) & (ramclustObj$cmpd[i] == ramclustObj$ann[i]))  {
        if(ramclustObj$msfinder.mssearch.details[[i]]$summary[1,"totalscore"] >= min.msms.score ) {
          ramclustObj$inchikey[i]<-ramclustObj$msfinder.mssearch.details[[i]]$summary[1,"inchikey"]
          ramclustObj$smiles[i]<-ramclustObj$msfinder.mssearch.details[[i]]$summary[1,"smiles"]
          ramclustObj$ann[i]<-ramclustObj$msfinder.mssearch.details[[i]]$summary[1,"name"]
          ramclustObj$annconf[i]<-2
          ramclustObj$dbid[i]<-ramclustObj$msfinder.mssearch.details[[i]]$summary[1,"resources"]
        }
      }
    }
  }
  
  if(structure) {
    
    inchikey <- grep("inchikey", names(d),  ignore.case = TRUE)
   
    if(length(inchikey) == 2) {
      inchikey.short<- inchikey[grep("short", names(d)[inchikey], ignore.case = TRUE)]
      inchikey <- inchikey[which(inchikey != inchikey.short)]
    }
    
    if(length(inchikey) > 2) {
      stop("too many inchikey columns in MSFinder table - please report error to ", "maintainer('RAMClustR')",  '\n')
    }
  
    
    for(i in 1:length(ramclustObj$ann)) {
      if( is.data.frame(ramclustObj$msfinder.structure[[i]]) && (ramclustObj$cmpd[i] == ramclustObj$ann[i]) )  {
        
        tmpinch<-ramclustObj$msfinder.structure[[i]][1, "inchikey"]
        tmpinch.short<-unlist(strsplit(tmpinch, "-"))[1]
        if(use.short.inchikey & exists("inchikey.short")) {
          drow<-grep(tmpinch.short, d[,inchikey.short])
        } else {
          drow<-grep(tmpinch, d[,inchikey])
        }
        # d[drow,]
        # tmp<- ramclustObj$msfinder.structure[[i]][1, "inchikey"]
        
        ramclustObj$inchikey[i]<-ramclustObj$msfinder.structure[[i]][1,"inchikey"]
        ramclustObj$ann[i]<-ramclustObj$msfinder.structure[[i]][1,"name"]
        ramclustObj$smiles[i]<-ramclustObj$msfinder.structure[[i]][1,"smiles"]
        ramclustObj$annconf[i]<-2
        ramclustObj$dbid[i]<-ramclustObj$msfinder.structure[[i]][1,"resources"]
        
        if(length(drow) == 0) {
          ramclustObj$ann[i]<-ramclustObj$msfinder.structure[[i]][1, "name"]
        }
        
        if(length(drow) == 1) {
          ramclustObj$ann[i]<-d[drow, "Title"]
        }
        
        if(length(drow) > 1) {
          n<-d[drow, "Title"]
          nl<-nchar(n)
          ramclustObj$ann[i]<-n[which.min(nl)]
          ramclustObj$annconf[i]<-2
        }
        
        
      }
    }
  }
  
  if(formula) {
    for(i in 1:length(ramclustObj$ann)) {
      if( !is.na(ramclustObj$msfinder.formula[[i]]) && (ramclustObj$cmpd[i] == ramclustObj$ann[i]) )  {
        ramclustObj$ann[i]<-ramclustObj$msfinder.formula[i]
        ramclustObj$annconf[i]<-3
        ramclustObj$dbid[i]<-ramclustObj$msfinder.formula.details[[i]][1,"resourcenames"]
      }
    }
  }
  
  if(findmain) {
    for(i in 1:length(ramclustObj$ann)) {
      if( !is.na(ramclustObj$M[i]) && (ramclustObj$cmpd[i] == ramclustObj$ann[i]) )  {
        ramclustObj$ann[i]<-paste("M =", ramclustObj$M[i])
      }
    }
  }
  
  ramclustObj$inchikey[which(ramclustObj$inchikey == "undefined")]<-NA
  
  if(standardize.names) {
    cat("using chemical translation service - requires interet access and may take a few minutes to complete", '\n')
    
    inchikey2inchi<-which(!is.na(ramclustObj$inchikey) & is.na(ramclustObj$inchi))
    for(i in inchikey2inchi) {
      if(!is.na(ramclustObj$inchikey[i])) {
        
        link <- paste0("http://cts.fiehnlab.ucdavis.edu/rest/convert/InChIKey/InChI Code/", 
                       # unlist(strsplit(ramclustObj$inchikey[i], "-"))[1])
                         ramclustObj$inchikey[i])
        out<-NA
        start<-Sys.time()
        while(is.na(out[1])) {
          tryCatch(suppressWarnings(out<-readLines(link)), error = function(x) {NA}, finally = NA)
          if(as.numeric(Sys.time() - start) > 5) {
            ramclustObj$inchi[[i]] <- NA
            break
          }
        }
        inchis<-unlist(fromJSON(out)$result)
        if(length(inchis) == 0 ) {
          ramclustObj$inchi[i] <- NA
        }
        if(length(inchis)>=1) {
          ramclustObj$inchi[i] <- inchis[1]
        }
      }
    }
    
    ## rJava required for rcdk/rinchi.  causes too many headaches. 
    ## try to find a lookup alternative for inchi to smiles
    # inchi2smiles<-which(!is.na(ramclustObj$inchi) & is.na(ramclustObj$smiles))
    # if(length(inchi2smiles) > 0) {
    #   for(i in inchi2smiles) {
    #     inchi<-ramclustObj$inchi[i]
    #     m<-parse.inchi(inchi)[[1]]
    #     s<-get.smiles(m)
    #     rm(m)
    #     ramclustObj$smiles[i]<-s
    #   }
    # }
    # 
    # for(i in 1:length(ramclustObj$ann)) {
    #   if(!is.na(ramclustObj$inchikey[i])) {
    #     
    #     link <- paste0("http://cts.fiehnlab.ucdavis.edu/service/synonyms/", ramclustObj$inchikey[i])
    #     out<-NA
    #     start<-Sys.time()
    #     while(is.na(out[1])) {
    #       tryCatch(suppressWarnings(out<-readLines(link)), error = function(x) {NA}, finally = NA)
    #       stop<-Sys.time()
    #       if(as.numeric(stop - start) > 5) {
    #         ramclustObj$synonyms[[i]] <- NA
    #         break
    #       }
    #     }
    #     syns<-unlist(fromJSON(out))
    #     if(length(syns) == 0) {
    #       ramclustObj$synonyms[[i]] <- NA
    #     }
    #     if(length(syns)>=1) {
    #       nc<-nchar(syns)
    #       syns<-syns[order(nc, decreasing = FALSE)]
    #       ramclustObj$synonyms[[i]] <- syns
    #     }
    #   #}}
    #     
    #     if(is.na(ramclustObj$inchi[i])) {          
    #       link <- paste0("http://cts.fiehnlab.ucdavis.edu/rest/convert/InChIKey/InChI Code/", ramclustObj$inchikey[i])
    #       out<-NA
    #       while(is.na(out[1])) {
    #         tryCatch(suppressWarnings(out<-readLines(link)), error = function(x) {NA}, finally = NA)
    #       }
    #       inchi<-as.character(unlist(fromJSON(out))["result"])
    #       if(length(inchi) == 0) {
    #         ramclustObj$inchi[[i]] <- NA
    #       }
    #       if(length(inchi)>=1) {
    #         nc<-nchar(inchi)
    #         inchi<-inchi[order(nc, decreasing = FALSE)]
    #         ramclustObj$inchi[i] <- inchi[1]
    #       }
    #     }
    #   }
    # }
  }
  
  
  
  ## modify compound names to make them unique
  nt <- table(ramclustObj$ann)
  ramclustObj$inchikey[which(is.na(ramclustObj$inchikey))] <- "NA"
  while(any(nt > 1)) {
    do<-which(nt>1)[1]
    mtch<-which(ramclustObj$ann == names(nt)[do])
    if(length(unique(ramclustObj$inchikey[mtch]))==1){
      ramclustObj$ann[mtch] <- paste(ramclustObj$ann[mtch], c(1:length(mtch)), sep = "__")
    } else {
      for(j in 1:length(mtch)) {
        if(any(names(ramclustObj)=="synonyms")) {
          cur<-which(ramclustObj$synonyms[[mtch[j]]] == names(do))
          if(length(cur)>0) {
            if(length(ramclustObj$synonyms[[mtch[j]]]) > cur){
              ramclustObj$ann[mtch[j]] <- ramclustObj$synonyms[[mtch[j]]][cur+1]
            } else {ramclustObj$ann[mtch[j]] <- paste(ramclustObj$ann[mtch[j]], j, sep = "__")}
          } else {ramclustObj$ann[mtch[j]] <- paste(ramclustObj$ann[mtch[j]], j, sep = "__")}
        }
      }
    }
    nt <- table(ramclustObj$ann)
  }
  
  ramclustObj$inchikey[which(ramclustObj$inchikey == "NA")] <- NA
  
  ramclustObj$history <- paste(ramclustObj$history, 
                               " Annotations were assigned using the RAMClustR annotate function.", 
                               " Annotation priority was assigned from higest priority to lowest:", 
                               if(any(names(ramclustObj) == "rs.lib")) {" RAMsearch, "},
                               if(any(names(ramclustObj) == "msfinder.mssearch.details")) {" MSFinder spectrum search, "},
                               if(any(names(ramclustObj) == "msfinder.structure.details")) {" MSFinder structure, "},
                               if(any(names(ramclustObj) == "msfinder.formula.details")) {" MSFinder formula, "},
                               if(any(names(ramclustObj) == "M.ann")) {" interpretMSSpectrum M."},
                               sep = ""
                                 )
  
  return(ramclustObj)
}

