##########################################################################
# VaRank 1.0                                                             #
#                                                                        #
# VaRank: a simple and powerful tool for ranking genetic variants        #
#                                                                        #
# Copyright (C) 2014 Veronique Geoffroy (veronique.geoffroy@inserm.fr)   # 
#                    Jean Muller (jeanmuller@unistra.fr)                 # 
#                                                                        #
# Please cite the following article:                                     #
#    XXX                                                                 #
#                                                                        #
# This is part of VaRank source code.                                    #
#                                                                        #
# This program is free software; you can redistribute it and/or          #
# modify it under the terms of the GNU General Public License            # 
# as published by the Free Software Foundation; either version 3         # 
# of the License, or (at your option) any later version.                 #
#                                                                        #
# This program is distributed in the hope that it will be useful,        # 
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with this program; If not, see <http://www.gnu.org/licenses/>.   #
##########################################################################

## Loading the different options in the following order:
## - Default options
## - Config file options (if file exists)
## - Options given in arguments
#
## Please note: the case in the name of options is not important. Ex: "vcfdir" = "vcfDir"
##

proc VaRank_Version {} {

    #VaRank Version 

    global g_VaRank

    if {[info exists g_VaRank(Version)]} {return $g_VaRank(Version)} else {set g_VaRank(Version) "1.0"}

    return $g_VaRank(Version)
}

proc configureVaRank {argv} {

    global g_VaRank
    global g_lPatientsOf

    puts "VaRank [VaRank_Version]"
    puts "VaRank is a program for Ranking genetic Variation from NGS data"
    puts ""
    puts "Copyright (C) 2014 GEOFFROY Veronique and MULLER Jean"
    puts ""
    puts "Please feel free to contact us for any suggestions or bug reports"
    puts "email: veronique.geoffroy@inserm.fr; jeanmuller@unistra.fr"
    puts ""

    puts "...downloading the configuration data ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"

    ## We must have a vcf input file
    ################################
    set  k [lsearch -regexp -nocase $argv "vcfDir"]
    if {$k != -1} {
	set g_VaRank(vcfDir) [lindex $argv [expr {$k+1}]]
    } else {
	puts "VaRank needs in argument a study directory containing your vcf input file (-vcfDir ...) - Exit."
	exit
    }
    set  liste_vcfFile [glob -nocomplain $g_VaRank(vcfDir)/*.vcf]
    if {$liste_vcfFile == ""} {
	puts "VaRank needs a VCF file in input. No VCF file present in $g_VaRank(vcfDir) - Exit."; exit
    }

    ## Load default options
    #######################
    puts "\t...configuration data by default"
    set g_VaRank(Homstatus) "no"
    set g_VaRank(Homcutoff) 80

    set g_VaRank(vcfInfo)  "no"
    set g_VaRank(rsFromVCF) "no"

    set g_VaRank(MEScutoff) -15
    set g_VaRank(SSFcutoff) -5
    set g_VaRank(NNScutoff) -10
    set g_VaRank(phastConsCutoff) 0.95
    set g_VaRank(readFilter)  10
    set g_VaRank(depthFilter) 10
    set g_VaRank(rsFilter)   removeNonPathoRS
    set g_VaRank(freqFilter) 0.01
    set g_VaRank(readPercentFilter) 15
    set g_VaRank(nowebsearch) "yes"
    set g_VaRank(metrics) "us"
    set g_VaRank(extann)  ""

    set g_VaRank(S_Known) 110
    set g_VaRank(S_Nonsense) 100
    set g_VaRank(S_Fs) 100
    set g_VaRank(S_EssentialSplice) 90
    set g_VaRank(S_StartLoss) 80
    set g_VaRank(S_StopLoss) 80
    set g_VaRank(S_Missense) 50
    set g_VaRank(S_CloseSplice) 70
    set g_VaRank(S_Inframe) 40
    set g_VaRank(S_Synonymous) 10
    set g_VaRank(S_DeepSplice) 25
    
    #set g_VaRank(uniprot)  ""
    set g_VaRank(uniprot)  "HUMAN.fasta.gz"
    set g_VaRank(refseq)   "human.protein.faa.gz"

    set g_VaRank(hgmdUser)   ""
    set g_VaRank(hgmdPasswd) ""

    ## Load config file options
    ###########################
    set lOptionsOk "vcfDir Homstatus Homcutoff vcfInfo rsFromVCF MEScutoff SSFcutoff NNScutoff phastConsCutoff readFilter depthFilter rsFilter freqFilter readPercentFilter nowebsearch metrics extann S_Known S_Fs S_Nonsense S_EssentialSplice S_StartLoss S_StopLoss S_Missense S_CloseSplice S_Inframe S_Synonymous S_DeepSplice uniprot refseq hgmdUser hgmdPasswd"
    regsub "sources" $g_VaRank(sourcesDir) "" dir
    foreach configFile [glob -nocomplain $g_VaRank(vcfDir)/configfile] {
	puts "\t...configuration data from $configFile"
	foreach L [LinesFromFile $configFile] {
	    if { [regexp "^#" $L]} {continue}

	    #Reading the config file and extracting the families
	    #
	    if {![regexp "^-" $L]} {
		if {[regexp "^(fam\[0-9\]+) *: *(.+)" $L match family lpat]} {

		    foreach val [split [string trim $lpat] " "] {
			lappend g_lPatientsOf($family) $val
		    }
		}
		continue
	    }
	    regsub -all "^-|:" $L "" L
	    set optionName  [lindex $L 0]
	    set optionValue [lindex $L 1]
	    set k [lsearch -regexp -nocase $lOptionsOk $optionName]
	    if {$k != -1} {
		set optionName [lindex $lOptionsOk $k]
		set g_VaRank($optionName) $optionValue
	    } else {
		puts "\"$optionName\" option not known."
		puts "For more information on the arguments, please use the -help option"
		exit
	    }
	}		
	break
    }

    ## Load options given in arguments
    ##################################
    puts "\t...configuration data given in arguments"
    regsub -all "^-|:" $argv "" argv
    set i 0
    set j 1
    while {$j < [llength $argv]} {
	set optionName [lindex $argv $i]
	regsub -all "^-|:" $optionName "" optionName

	set optionValue [lindex $argv $j]
	set  k [lsearch -regexp -nocase $lOptionsOk $optionName]
	if {$k != -1} {
	    set optionName [lindex $lOptionsOk $k]
	    set g_VaRank($optionName) $optionValue
	} else {
	    puts "\"$optionName\" option not known."
	    puts "For more information on the arguments, please use the -help option"
	    exit
	}

	incr i 2
	incr j 2
    }

    puts "\t...checking configuration data"
    ## It must be an integer value for the -readFilter -depthFilter -S_... options.
    foreach option "readFilter depthFilter S_Known S_Nonsense S_Fs S_EssentialSplice S_StartLoss S_StopLoss S_Missense S_CloseSplice S_Inframe S_Synonymous S_DeepSplice" {
	if {![regexp "^\[0-9\]+$" $g_VaRank($option)]} {
	    puts "Bad value : -$option = $g_VaRank($option)"	
	    puts "Should be an integer. Exit"
	    exit
	}
    }

    ## It must be an existing files
    foreach option "uniprot refseq" {
	if {![file exists $g_VaRank(DB)]} {
	    puts "No directory for database setup. This should be in [file join $env(VARANK) Databases]."
	} else {
	    if {[file exists [file join $g_VaRank(DB) $g_VaRank($option)]]} {
		set $g_VaRank($option) [file join $g_VaRank(DB) $g_VaRank($option)]
	    } else {	
		set g_VaRank($option) ""
		puts "Bad value for $option, file does not exists ([file join $g_VaRank(DB) $g_VaRank($option)])."
	    }
	}
    }

    ## It must be "yes" or "no"
    foreach option "Homstatus vcfInfo rsFromVCF nowebsearch" {
	if {![regexp -nocase "^(yes)|(no)$" $g_VaRank($option)]} {
	    puts "Bad value : -$option = $g_VaRank($option)"
	    puts "Should be \"yes\" or \"no\""
	    exit
	}
    }

    ## It must be an external annotation file
    if {![file exists [file join $g_VaRank(extann)]] && $g_VaRank(extann) != ""} {
	set g_VaRank(extann) ""
	puts "Bad value for extann, file does not exists $g_VaRank(extann)."
    }


    ## It must be "removeNonPathoRS" or "none" for the -rsFilter option.
    if {![regexp -nocase "removeNonPathoRS|none" $g_VaRank(rsFilter)]} {
	puts "Bad value : -rsFilter = $g_VaRank(rsFilter)"
	puts "Should be \"removeNonPathoRS\" or \"none\""
	exit
    }

    ## It must be a value comprised between 0 and 100 for the -Homcutoff
    foreach option "Homcutoff readPercentFilter" {
	if {$g_VaRank($option) > 100 || $g_VaRank($option) < 0} {
	    puts "Bad value : -$option = $g_VaRank($option)"
	    puts "Should be an integer comprised into the range values: \[0,100\]"
	    exit
	}
    }

    ## It must be a value comprised between -100 and 0 for the -MEScutoff -SSFcutoff -NNScutoff options.
    foreach option "MEScutoff SSFcutoff NNScutoff" {
	if {$g_VaRank($option) < -100 || $g_VaRank($option) > 0} {
	    puts "Bad value : -$option = $g_VaRank($option)"
	    puts "Should be an integer comprised into the range values: \[-100,0\]"
	    exit
	}
    }

    ## It must be a value comprised between 0 and 1 for the -phastConsCutoff option.
    if {$g_VaRank(phastConsCutoff) < 0 ||$g_VaRank(phastConsCutoff) > 1} {
	puts "Bad value : -phastConsCutoff = $g_VaRank(phastConsCutoff)"
	puts "Should be a value comprised into the range values: \[0,1\]"
	exit
    }

    ## It must be a value comprised between 0 and 1 for the freqFilter option.
    if {$g_VaRank(freqFilter) < 0 || $g_VaRank(freqFilter) > 1} {
	puts "Bad value : -freqFilter = $g_VaRank(freqFilter)"
	puts "Should be a value comprised into the range values: \[0,1\]"
	exit
    }

    ## It must be "fr" or "us" for the metrics option.
	if {![regexp -nocase "^(fr)|(us)$" $g_VaRank(metrics)]} {
	    puts "Bad value : -metrics = $g_VaRank(metrics)"
	    puts "Should be \"fr\" or \"us\""
	    exit
	}
 
    puts "\t******************************************"
    puts "\tVaRank has been run with these arguments :"
    puts "\t******************************************"
    set lKey [array names g_VaRank]
    foreach key [lsort $lKey] {
	puts "\t-$key $g_VaRank($key)"
    }
    puts "\t******************************************"
    

    return	
}
