## This is the primary ILSA (Inverted Library Search Algorithm)
## It receives as input:
## - up to 3 query mass spectra (30, 60, 90) [requiring 30] as raw files (e.g., .txt, .jsp, .csv)
## - a reference library (as R data table)
## - search mode (default: HiRes, options are HiRes or LowRes)
## - Only Matches (default: False, options are True or False)
## - target intensity threshold (value between 0 and 1)
## - Noise intensity threshold (value between 0 and 1)
## - m/z tolerance (value between 0 and 0.1)
##
## outputs is an R data.table with all search results that can the be displayed
## in DIT or saved as excel sheets (batch searching) or stored in some other 
## data structure for archiving and mining.
## =============================================================================
## LOADING EXTERNAL PACKAGES IF NOT ALREADY LOADED
# for creating data.tables
if("data.table" %in% rownames(installed.packages()) == FALSE) {
  install.packages("data.table")
  library(data.table)
} else {
  if("data.table" %in% (.packages()) == FALSE) library(data.table) 
}

asm_ILSA <- function(Query1, 
                     Query2, 
                     Query3, 
                     RefLibrary, 
                     SearchMode,
                     OnlyMatches,
                     TargetIntensityThreshold,
                     NoiseIntensityThreshold,
                     mzTol){
  
    
      if(SearchMode == "HiRes"){
        if(length(mzTol)==0){
          mzTol = 0.005  # default value if mz tolerance is not specified by user
        }
      } else {
        mzTol = 1e-8;  # default value if search mode is "Low Res"
      }
  
      epsilon_0 = mzTol;   # mz difference used to identify targets
      epsilon_1 = 2*epsilon_0; # mz difference when computing scores
  
      cats = colnames(RefLibrary);
      i_PM = which(cats=="PrecursorMZ_gen")
      i_PMi = which(cats=="pmMajIsoMZ30V")
      i_BP = which(cats == "theoBP")
      i_BPi = which(cats=="bpMajIsoMZ30V")
      i_MF1 = which(cats == "mfMZ30V");  # eventually may want to allow for major fragment 1 matching
      i_MF1i = which(cats=="mfMajIsoMZ30V")
      i_MF2 = NULL;  # eventually may want to allow for major fragment 2 matching
      
      i_PMi_ab = which(cats == "pmMajIsoAb30V")
      i_BPi_ab = which(cats == "bpMajIsoAb30V");

      
      sPM = as.numeric(RefLibrary[,i_PM,with=FALSE][[1]]); # the set of all protonated molecules in the library
      sPMi = as.numeric(RefLibrary[,i_PMi,with=FALSE][[1]]); # all +1 isotopes of protonated molecules in the library
      
      sBP = as.numeric(RefLibrary[,i_BP,with=FALSE][[1]]); # the set of all explainable base peaks in the library
      sBPi = as.numeric(RefLibrary[,i_BPi,with=FALSE][[1]]); # all +1 isotopes of explainable base peaks in the library
      
      sMF1 = as.numeric(RefLibrary[,i_MF1,with=FALSE][[1]]);  # the set of all explainable major fragment ions in the library
      sMF1i = as.numeric(RefLibrary[,i_MF1i,with=FALSE][[1]]);# all +1 isotopes of explainable major fragment in the library
      
      
      if(SearchMode == "LowRes"){
        sPM = round(sPM,0)
        sPMi = round(sPMi,0)
        
        sBP = round(sBP,0)
        sBPi = round(sBPi,0)
        
        sMF1 = round(sMF1,0)
        sMF1i = round(sMF1i,0)
      }
    
      
     bpN_Query1 = asm_bpNormalizer(Query1,SearchMode,precision=4,NoiseIntensityThreshold)  # This is unrounded - for display purposes (even if search mode is Low Res)
     targets_disp = asm_targetMolecules(bpN_Query1,TargetIntensityThreshold)
     targets_mz = targets_disp_mz = targets_disp$mz
     targets_ab = targets_disp$ab
     
     if(SearchMode=="LowRes"){
       bpN_Query1 = asm_bpNormalizer(Query1,SearchMode,precision=0,NoiseIntensityThreshold)
       targets_mz = round(targets_disp$mz,0)
     } 
     
     # bpN_Query1 = asm_bpNormalizer(Query1,SearchMode,precision=0,NoiseIntensityThreshold)
     # targets_disp = targets = asm_targetMolecules(bpN_Query1,TargetIntensityThreshold)
     # targets_ab = targets$ab
     # targets_mz = targets$mz
     
     
     
     
     QueryScenario = c(1,0,0);
     weights = 1*QueryScenario;

     # Check if other files are available
     if(length(Query2)>0){
       bpN_Query2 = asm_bpNormalizer(Query2,SearchMode,precision=0,NoiseIntensityThreshold)
       QueryScenario[2] = 1;
     }
     if (length(Query3)>0){
       bpN_Query3 = asm_bpNormalizer(Query3,SearchMode,precision=0,NoiseIntensityThreshold) 
       QueryScenario[3] = 1
     }
     weights = (1/sum(QueryScenario))*QueryScenario

     run_length = length(targets_mz)

     
     FullResults.list = NULL;
     
     stime <- system.time({
     for(i in 1:run_length){
       
                cat(paste0("Target m/z: ", targets_disp_mz[i],"\n"))
       
                sPMm = NULL;
                sPMm = which(abs(sPM - as.numeric(targets_mz[i])) <= epsilon_0);     # subset of protonated molecules within epislon_0 of the target
                sBPm = NULL;
                sBPm = which(abs(sBP - as.numeric(targets_mz[i])) <= epsilon_0);     # subset of base peaks within epislon_0 of the target
                sMF1m = NULL;
                sMF1m = which(abs(sMF1 - as.numeric(targets_mz[i])) <= epsilon_0);   # subset of major fragment within epislon_0 of the target
                sPMim = NULL;
                sPMim = which(abs(sPMi - as.numeric(targets_mz[i])) <= epsilon_0);  # subset of protonated molecule +1 isotopes  within epislon_0 of the target
                sBPim = NULL;
                sBPim = which(abs(sBPi - as.numeric(targets_mz[i])) <= epsilon_0);  # subset of basepeak +1 isotopes  within epislon_0 of the target
                
                ##== Code to check if potential matches in sMF1m are reasonable by seeing if the associated protonated molecule or base peak is in the target list.
                illogical_entries = NULL;
                if(length(sMF1m)>0){
                  for(ii in 1:length(sMF1m)){
                    a = which(abs(targets_mz[1:i] - sPM[sMF1m[ii]]) <= epsilon_0)
                    b = which(abs(targets_mz[1:i] - sBP[sMF1m[ii]]) <= epsilon_0)
                    
                    if(length(a)+length(b)==0){
                      illogical_entries = c(illogical_entries,ii)
                    }
                    
                  }
                }
                
                if(length(illogical_entries)>0){
                  sMF1m = sMF1m[-illogical_entries]
                }
                ##===
                
                ##== Code to check if potential matches in sPMim are reasonable by seeing if the associated protonated molecule is in the target list.
                illogical_entries = NULL;
                if(length(sPMim)>0){
                  for(ii in 1:length(sPMim)){
                    a = which(abs(targets_mz[1:i] - sPM[sPMim[ii]]) <= epsilon_0)

                    if(length(a)==0){
                      illogical_entries = c(illogical_entries,ii)
                    }

                  }
                }

                if(length(illogical_entries)>0){
                  sPMim = sPMim[-illogical_entries]
                }
                ##===
                
                ##== Code to check if potential matches in sBPim are reasonable by seeing if the associated sBPm is in the target list.
                illogical_entries = NULL;
                if(length(sBPim)>0){
                  for(ii in 1:length(sBPim)){
                    a = which(abs(targets_mz[1:i] - sBP[sBPim[ii]]) <= epsilon_0)
                    
                    if(length(a)==0){
                      illogical_entries = c(illogical_entries,ii)
                    }
                    
                  }
                }
                
                if(length(illogical_entries)>0){
                  sBPim = sBPim[-illogical_entries]
                }
                ##===
                
                sTotal = NULL
                sTotal = unique(c(sPMm,sBPm,sMF1m,sPMim,sBPim));
                
                # OUTPUTS
                Compound = character(length(sTotal))
                Class = character(length(sTotal))
                REFMZ = numeric(length(sTotal))
                deltaMZ = numeric(length(sTotal))
                FPIE = numeric(length(sTotal)); FPIE1 = numeric(length(sTotal)); FPIE2 = numeric(length(sTotal)); FPIE3 = numeric(length(sTotal))
                RevMF = numeric(length(sTotal)); RevMF1 = numeric(length(sTotal)); RevMF2 = numeric(length(sTotal)); RevMF3 = numeric(length(sTotal));
                MDSpread = numeric(length(sTotal)); MDSpread1 = numeric(length(sTotal)); MDSpread2 = numeric(length(sTotal)); MDSpread3 = numeric(length(sTotal)) # mass difference spread
                IRDiff_30V = numeric(length(sTotal)) # isotope ratio difference
                MatchType = character(length(sTotal))
                
                if(length(sTotal)>0){
                        for(j in 1:length(sTotal)){
                          
                                checkPM = (sTotal[j] %in% sPMm == TRUE) 
                                checkBP = (sTotal[j] %in% sBPm == TRUE)
                                checkPMi = (sTotal[j] %in% sPMim == TRUE)
                                checkBPi = (sTotal[j] %in% sBPim == TRUE)
                                checkMF1 = (sTotal[j] %in% sMF1m == TRUE)
                                
                                while(TRUE){
                                  
                                  if(checkPM){
                                    MatchType[j] = "Protonated Molecule"
                                    ref_mz = as.numeric(RefLibrary[sTotal[j],i_PM,with=FALSE]);
                                    break
                                  }  
                                  
                                  if(checkBP){
                                    MatchType[j] = "Base Peak"
                                    ref_mz = as.numeric(RefLibrary[sTotal[j],i_BP,with=FALSE]);
                                    break
                                  }  
                                  
                                  if(checkPMi){
                                    MatchType[j] = "+1 Isotope (PM)"
                                    ref_mz = as.numeric(RefLibrary[sTotal[j],i_PMi,with=FALSE]);
                                    break
                                  }  
                                  
                                  if(checkBPi){
                                    MatchType[j] = "+1 Isotope (BP)"
                                    ref_mz = as.numeric(RefLibrary[sTotal[j],i_BPi,with=FALSE]);
                                    break
                                  } 
                                  
                                                                    
                                  if(checkMF1){
                                    MatchType[j] = "Major Fragment Ion"
                                    ref_mz = as.numeric(RefLibrary[sTotal[j],i_MF1,with=FALSE]);
                                    break
                                  } 
                                  
                                }
                                
                                NumRefSpec = as.numeric(RefLibrary[sTotal[j],NumSpectra][[1]])
                                CEs = RefLibrary[sTotal[j],Energies][[1]]
                                
                                asm_MF = numeric(NumRefSpec); # There is one output from the revMF function
                                asm_PE = array(0,dim=c(NumRefSpec,6)) # There are 6 outputs from the Peaks Explained Function
                                asm_ILSA_Scores = array(0,dim=c(NumRefSpec,3)) # There are 3 outputs from the Average Scores Function
                                
                                for(m in 1:NumRefSpec){
                                        if(QueryScenario[m]==0) next # this means that that query spectrum is unavailable
                                  
                                        iCE = CEs[m]
                                        if(iCE == "+30 V"){
                                                Query=bpN_Query1;
                                        } else if (iCE == "+60 V"){
                                                Query = bpN_Query2;
                                        } else if (iCE == "+90 V"){
                                                Query = bpN_Query3;
                                        }
                                        
                                        prot_mol = as.numeric(RefLibrary[sTotal[j],i_PM,with=FALSE][[1]]); 
                                        max_mz_consider = prot_mol + 5 + epsilon_0; # allows an additional 3 + epsilon_0 Da of potential isotope peaks 
                                        
                                        RefPeakList = asm_spec2dt_ref(RefLibrary[sTotal[j],PeakLists][[1]][m],max_mz_consider);  # collect peaks up to a max mz value to avoid computations on noise 
                                        RefPeakList = asm_bpNormalizer(RefPeakList,SearchMode,0,NoiseIntensityThreshold)
                                        
                                        asm_ILSA_Scores[m,] = asm_ILSA_Scoring(RefPeakList,Query,epsilon_1)
                                        
                                        if (m==1) isotopeDiff = asm_isotopeDiffCalculator(sPM[sTotal[j]],sPMi[sTotal[j]],RefLibrary[sTotal[j],i_PMi_ab,with=FALSE],Query,epsilon_0);
                                        
                                }
                                
                                asm_PEs = round(asm_ILSA_Scores[,1],4)
                                asm_MFs = round(asm_ILSA_Scores[,2],4)
                                asm_MDs = round(asm_ILSA_Scores[,3],4)

                                weights1 = weights  # should be a tunable parameter fraction of reference abundance explained
                                weights2 = weights  # should be a tunable parameter reverse match factor
                                weights3 = weights  # should be a tunable parameter mass difference spread
          
                                aPE = 0; # fraction of reference abundance explained by number of peaks matched
                                aMD = 0; # mass difference spread
                                aMF = 0; # reverse match factor
          
                                # compute weighted average values
                                for(m in 1:NumRefSpec){
                                        aPE = aPE + weights1[m]*asm_PEs[m]
                                        aMF = aMF + weights2[m]*asm_MFs[m]
                                        aMD = aMD + weights3[m]*asm_MDs[m]
                                }
          
                               
                                # Assign "scores" for each match
                                deltaMZ[j] = round((targets_disp_mz[i] - ref_mz),4);
                                FPIE[j] = round(aPE,3);
                                RevMF[j] = round(aMF,3);
                                MDSpread[j] = round(aMD,3)
                                IRDiff_30V[j] = round(isotopeDiff,3)
                                
                                FPIE1[j] = asm_PEs[1]
                                FPIE2[j] = asm_PEs[2]
                                FPIE3[j] = asm_PEs[3]
                                
                                RevMF1[j] = asm_MFs[1]
                                RevMF2[j] = asm_MFs[2]
                                RevMF3[j] = asm_MFs[3]
                                
                                MDSpread1[j] = asm_MDs[1]
                                MDSpread2[j] = asm_MDs[2]
                                MDSpread3[j] = asm_MDs[3]

                              
                                # Match information from library
                                Compound[j] = RefLibrary[sTotal[j],Name][[1]];
                                Class[j] = RefLibrary[sTotal[j],Class][[1]];
                                REFMZ[j] = round(ref_mz,4);
                                
                                # for console
                                cat(paste0(RefLibrary[sTotal[j],Name][[1]],": ", "FPIE-based Index: ",FPIE[j],";   RevMF-based Index: ",RevMF[j],"; ",MatchType[j],"\n"))
                        }
                        
                        RealMatches = which(as.numeric(FPIE)!=0)
                        if(length(RealMatches)>0){
                          Compound = Compound[RealMatches]
                          Class = Class[RealMatches]
                          deltaMZ = deltaMZ[RealMatches]
                          REFMZ = REFMZ[RealMatches]
                          FPIE = FPIE[RealMatches]
                          FPIE1 = FPIE1[RealMatches]
                          FPIE2 = FPIE2[RealMatches]
                          FPIE3 = FPIE3[RealMatches]
                          RevMF = RevMF[RealMatches]
                          RevMF1 = RevMF1[RealMatches]
                          RevMF2 = RevMF2[RealMatches]
                          RevMF3 = RevMF3[RealMatches]
                          MDSpread = MDSpread[RealMatches]
                          MDSpread1 = MDSpread1[RealMatches]
                          MDSpread2 = MDSpread2[RealMatches]
                          MDSpread3 = MDSpread3[RealMatches]
                          IRDiff_30V = IRDiff_30V[RealMatches]
                          MatchType = MatchType[RealMatches]
                          
                          results = as.data.frame(cbind(Compound, Class, deltaMZ, REFMZ, FPIE, FPIE1, FPIE2, FPIE3, RevMF, RevMF1, RevMF2, RevMF3, MDSpread, MDSpread1, MDSpread2, MDSpread3, IRDiff_30V, MatchType))
                        } else {
                          results = as.data.frame(cbind(Compound="NA",Class="NA",deltaMZ="NA",REFMZ="NA",FPIE="NA",FPIE1="NA", FPIE2="NA", FPIE3="NA",RevMF="NA",RevMF1="NA", RevMF2="NA", RevMF3="NA", MDSpread = "NA",MDSpread1= "NA", MDSpread2= "NA", MDSpread3= "NA", IRDiff_30V = "NA", MatchType="NA"))
                          cat("No matches in database.\n")
                        }
                        
                        
                } else {
                        if(OnlyMatches==TRUE){
                                next
                        } else {
                                results = as.data.frame(cbind(Compound="NA",Class="NA",deltaMZ="NA",REFMZ="NA",FPIE="NA",FPIE1="NA", FPIE2="NA", FPIE3="NA",RevMF="NA",RevMF1="NA", RevMF2="NA", RevMF3="NA", MDSpread = "NA",MDSpread1= "NA", MDSpread2= "NA", MDSpread3= "NA", IRDiff_30V = "NA", MatchType="NA"))
                                cat("No matches in database.\n")
                        }
                        
                }
                
                
                FullResults.list[[i]] = results

     } ## end i loop
       
     })[3]
     
  cat(paste0("ILSA of query completed in ", round(stime,3), " seconds."))
  return(list(Q1 = Query1, Q2 = Query2, Q3 = Query3, TARGETS = targets_disp, SearchResults = FullResults.list))
  
}




