##########################################################################
# VaRank 1.0                                                             #
#                                                                        #
# VaRank: a simple and powerful tool for ranking genetic variants        #
#                                                                        #
# Copyright (C) 2014 Veronique Geoffroy (veronique.geoffroy@inserm.fr)   # 
#                    Jean Muller (jeanmuller@unistra.fr)                 # 
#                                                                        #
# Please cite the following article:                                     #
#    XXX                                                                 #
#                                                                        #
# This is part of VaRank source code.                                    #
#                                                                        #
# This program is free software; you can redistribute it and/or          #
# modify it under the terms of the GNU General Public License            # 
# as published by the Free Software Foundation; either version 3         # 
# of the License, or (at your option) any later version.                 #
#                                                                        #
# This program is distributed in the hope that it will be useful,        # 
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with this program; If not, see <http://www.gnu.org/licenses/>.   #
##########################################################################


proc ExternalAnnotations args {

    #To add external annotation to a gene in particular
    #
    # Format is tab separated values, 1st line is a header, 1st column is the gene name, rest is free
    # Typical use would be a gene file containing specific annotations such as tranmission mode, disease, expression...
    #
    # 14/03/2014

    global g_VaRank
    global g_ExtAnnotation

    set What [join $args ","]
    if {[info exists g_ExtAnnotation($What)]} {return [set g_ExtAnnotation($What)]}

    if {[info exists g_VaRank(extann)] && $g_VaRank(extann) != ""} {
	set File_Anno [set g_VaRank(extann)]
    } else {return ""}
    
    if {[file exists [lindex $args 0]]} {
	set What [join $args ","]
	set File_Anno [lindex $args 0]
    } else {
	set What [join [concat [list $File_Anno] $args] ","]
    }
    
    #puts "$File_Anno"
    #puts "$What"

    if {  [info exists g_ExtAnnotation($What)]} {return [set g_ExtAnnotation($What)]}

    if {! [info exists g_ExtAnnotation($File_Anno,Loaded)]} {
	
	if {![info exists g_ExtAnnotation(L_Files)]} {
	    set g_ExtAnnotation(L_Files) {}
	}
	lappend g_ExtAnnotation(L_Files) $File_Anno

	set First 1
	
	set g_ExtAnnotation($File_Anno,Loaded) 1

	puts "...Loading [file tail $File_Anno]  ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"

	#set F  [open "|gzip -cd $File_Anno"]
	set F  [open "$File_Anno"]
	while {[gets $F Line]>=0} {
	    if {! [regexp -nocase {[a-z0-9]+} $Line] || [regexp -nocase {^[\#]} $Line]} {continue}

	    if {$First} {
		set First 0
		if {![regexp -nocase "gene" [lindex [split $Line "\t"] 0]]} {
		    puts "Reading $FileAnno, header is present but does not contain the gene column used to identify"
		    set g_ExtAnnotation($File_Anno,Header) ""
		} else {
		    set g_ExtAnnotation($File_Anno,Header) $Line
		}
		continue
	    }
	    
	    set SLine [split $Line "\t"]
	    set ID    [string trim [lindex $SLine 0]]

	    if {![info exists g_ExtAnnotation($File_Anno,L_ID)]} {set g_ExtAnnotation($File_Anno,L_ID) {}}
	    if {![info exists g_ExtAnnotation($File_Anno,$ID)]} {
		lappend g_ExtAnnotation($File_Anno,L_ID) $ID
	    } else {
		puts "Reading Annotation file [file tail $File_Anno] and $ID is seen multiple times."
	    }
	    set g_ExtAnnotation($File_Anno,$ID) [join [lrange $SLine 1 end] "\t"]
	}
	close $F

	puts "\t...[llength [set g_ExtAnnotation($File_Anno,L_ID)]] identifiers ([lindex [set g_ExtAnnotation($File_Anno,Header)] 0]) and [llength [split [set g_ExtAnnotation($File_Anno,Header)] "\t"]] annotations columns ([join [split [set g_ExtAnnotation($File_Anno,Header)] "\t"] ","])."
	
	if {[info exists g_ExtAnnotation($What)]} {return [set g_ExtAnnotation($What)]}
    } else {
	return ""
    }
    return ""
}

##
## Searching for an ID the "familyBarcode, barcode and stat" values.
## 
proc findBarcodesAndStatFor {ID} {

    global g_lPatientsOf
    global g_vcfINFOS
    global g_famBarcode
    global g_allPatients
    global g_perso
    global g_Statistics

    # Barcode for each variation observed 
    #
    # 0 homozygous/no information for the variation
    # 1 heterozygous for the variation
    # 2 homozygous for the variation

    # Output barcode HomCount HetCount allCount sampleCount
    # Output barcode HomCount HetCount allCount sampleCount AvgReadcount AvgTotalDepth

    #2014/06/09 Compute the statistics for coverage for all patients for one SNV

    if {[info exists g_famBarcode]} {unset g_famBarcode}
    
    set L_TotalDepth {}
    set L_SNVDepth   {}
    
    foreach fam [array names g_lPatientsOf] {

	#puts $fam

	foreach patient $g_lPatientsOf($fam) {

	    #puts "$ID $fam - $patient >>>>> $g_vcfINFOS($ID)"
	    
	    if {[regexp "$patient:(\[^: \]+):(\[^: \]+):(\[^: \]+):(\[^: \]+)" $g_vcfINFOS($ID) match homhtz depth read qual]} {
		#puts "$homhtz\t$depth\t$read"

		#Depth of coverage
		if {$depth!="NA" && [regexp {^[0-9]} $depth]} {lappend L_TotalDepth $depth}
		if {$read !="NA" && [regexp {^[0-9]} $read]}  {lappend L_SNVDepth   $read}

		#Ratio
		if {[regexp "\[^0-9\]" $read] || [regexp "\[^0-9\]" $depth] || $depth == "0"} {
		    #set g_perso($patient) "$homhtz\t$depth\t$read\t."
		    set g_perso($patient) "$homhtz\t$depth\t$read\tNA\t$qual"
		} else {
		    #set g_perso($patient) "$homhtz\t$depth\t$read\t[expr {int($read*100./$depth)}]"
		    set g_perso($patient) "$homhtz\t$depth\t$read\t[format "%.0f" [expr {$read*100.0/$depth}]]\t$qual"

		}
		set zygous($patient) "$homhtz"
	    }
	}
    }

    #Compute the statistics for coverage for all patients for one SNV
    #
    #puts "$ID Total $L_TotalDepth"
    #puts "$ID SNV $L_SNVDepth"

    set Mean_TotalDepth   "-1"
    set SD_TotalDepth     "-1"
    #set Min_TotalDepth    NA
    #set Max_TotalDepth    NA
    set Counts_TotalDepth [llength $L_TotalDepth]

    if {$Counts_TotalDepth > 1 && $L_TotalDepth != 0 && $L_TotalDepth != {}} {
	set MVSD [BasicStatistics $L_TotalDepth]
	set Mean_TotalDepth [format "%.0f" [lindex $MVSD 0]]
	set SD_TotalDepth   [format "%.0f" [lindex $MVSD 2]]
    } else {
	if {$L_TotalDepth != {}} {
	    set Mean_TotalDepth [format "%.0f" $L_TotalDepth]
	} 
	set SD_TotalDepth   "0"
    }

    set Mean_SNVDepth   "-1"
    set SD_SNVDepth     "-1"
    #set Min_SNVDepth    NA
    #set Max_SNVDepth    NA
    set Counts_SNVDepth [llength $L_SNVDepth]

    if {0 && $Counts_SNVDepth>1 && $L_SNVDepth!=0 && $L_SNVDepth!={}} {
	set MVSD [BasicStatistics $L_SNVDepth]
	set Mean_SNVDepth [format "%.0f" [lindex $MVSD 0]]
	set SD_SNVDepth   [format "%.0f" [lindex $MVSD 2]]
    }
    #puts "$ID $Mean_SNVDepth $SD_SNVDepth $Counts_SNVDepth $Mean_TotalDepth $SD_TotalDepth $Counts_TotalDepth"


    #JEAN 2013/09/03 Not yet applied Modifying to let the NA status to get in replaced by the *

    set nbTotPatient [llength $g_allPatients]
    set barcode ""
    set nbPatientHetAveclID 0
    set nbPatientHomAveclID 0
    foreach fam [array names g_lPatientsOf] {
	set g_famBarcode($fam) ""
	foreach patient $g_lPatientsOf($fam) {
	    if {[info exists zygous($patient)]} {
		incr nbPatientAveclID
		if {$zygous($patient) == "hom" || $zygous($patient) == "hom?"} {
		    incr nbPatientHomAveclID
		    append g_famBarcode($fam) "2"
		    append barcode "2"
		} elseif {$zygous($patient) == "het" || $zygous($patient) == "het?"} {
		    incr nbPatientHetAveclID
		    append g_famBarcode($fam) "1"
		    append barcode "1"
		} else {
		    #incr nbPatientHetAveclID
		    append g_famBarcode($fam) "0"
		    append barcode "0"
		    #append g_famBarcode($fam) "*"
		    #append barcode "*"
		}
	    } else {
		append g_famBarcode($fam) "0"
		append barcode "0"
	    }
	}
    }
    set HomCount "$nbPatientHomAveclID"
    set HetCount "$nbPatientHetAveclID"
    #allCount is now the allele count (homozygous count as 2)
    set allCount [expr {(2*$HomCount)+$HetCount}]
    set sampleCount "$nbTotPatient"

    #Global statistics
    if {$allCount=="0"} {
	incr g_Statistics(all,Null) 
    } elseif {$HomCount=="0"} {
	incr g_Statistics(all,Het) 
    } elseif {$HetCount=="0"} {
    	incr g_Statistics(all,Hom) 
    } else {
	incr g_Statistics(all,Both) 
    }
    incr g_Statistics(all)

    #puts "$ID $barcode $HomCount $HetCount $allCount $sampleCount"

    return [list $barcode $HomCount $HetCount $allCount $sampleCount $Mean_SNVDepth $SD_SNVDepth $Counts_SNVDepth $Mean_TotalDepth $SD_TotalDepth $Counts_TotalDepth]
    #return "$barcode $HomCount $HetCount $allCount $sampleCount"
}

##
## Ranking by variant for all the variants, creation of 1 output file by patient.
## No filter applied on these output files.
##
## OUTPUTS: g_VaRank(vcfDir)/"family"_"patient"_allVariants.rankingByVar.tsv (1 by patient)
##
proc writeAllVariantsRankingByVar {} {

    global g_vcfINFOS_Supp
    global g_vcfINFOS
    global g_VaRank
    global g_ALAMUT
    global g_lScore
    global g_allPatients
    global g_lPatientsOf
    global g_famBarcode
    global g_deltaMES
    global g_deltaSSF
    global g_deltaNNS
    global g_perso
    global g_Statistics
        
    puts "...writing output files: all variants, ranking by var ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
    
    ## Checking if all these output files already exist: 
    ####################################################
    set allFilesExist 1
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set outputfile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByVar.tsv"
	    if {![file exists $outputfile]} {set allFilesExist 0; break}
	}
    }
    if {$allFilesExist} {
	puts "\t...\"*_allVariants.rankingByVar\" already exist, continue"
	return
    }
    
    ## Define the HeadLine (HL) foreach output:
    ###########################################
    #Headlines available in Alamut_HT 2013/03/18
    #id     gene    geneId  chrom   transcript      strand  transLen        protein Uniprot varType codingEffect    varLocation     assembly  gDNAstart       gDNAend gNomen  cDNAstart       cDNAend cNomen  pNomen  alt_pNomen      exon    intron  omimId  distNearestSS     nearestSSType   wtSSFScore      wtMaxEntScore   wtNNSScore      wtGSScore       wtHSFScore      varSSFScore     varMaxEntScore    varNNSScore     varGSScore      varHSFScore     nearestSSChange localSpliceEffect       proteinDomain1  proteinDomain2  proteinDomain3    proteinDomain4  rsId    rsValidated     rsSuspect       rsValidations   rsValidationNumber      rsAncestralAllelersHeterozygosity rsClinicalSignificance  rsMAF   rsMAFAllele     rsMAFCount      espRefEACount   espRefAACount   espRefAllCount  espAltEACount     espAltAACount   espAltAllCount  espEAMAF        espAAMAF        espAllMAF       espAvgReadDepth hgmdId  hgmdPhenotype     hgmdPubMedId    insNucs delNucs substType       wtNuc   varNuc  nucChange       phastCons       phyloP  wtAA_1  wtAA_3  wtCodon   wtCodonFreq     varAA_1 varAA_3 varCodon        varCodonFreq    posAA   nOrthos conservedOrthos conservedDistSpecies    BLOSUM45  BLOSUM62        BLOSUM80        wtAAcomposition varAAcomposition        wtAApolarity    varAApolarity   wtAAvolume      varAAvolume       granthamDist    AGVGDclass      AGVGDgv AGVGDgd SIFTprediction  SIFTweight      SIFTmedian      MAPPprediction  MAPPpValue        MAPPpValueMedian        PPH2class
    
    #2013/03/18 Jean Add the following Splice scores for variant  (varSSFScore, varMaxEntScore, varNNSScore)
    #2013/04/03 Jean Reorganized columns to better fit categories
    #2013/05/20 Jean Add the following HGMD features to test the HGMD database (hgmdId, hgmdPhenotype, hgmdPubMedId)
    #2013/09/25 Jean Add the ability to integrate the info column from the VCF data
    #2013/03/14 Jean Add the ability to integrate user external annotations

    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set rajout1 "VariantID\tGene\tomimId\tTranscriptID\tTranscriptLength\tChr\tStart\tEnd\tRef\tMut\tUniprot\tprotein\tposAA\twtAA_1\tvarAA_1"
	    set g_perso(HL) "Zygosity\tTotalReadDepth\tVarReadDepth\t%Reads_variation\tQUAL_Phred"
	    set rajout2 "VarType\tCodingEffect\tVarLocation\tExon\tIntron\tgNomen\tcNomen\tpNomen"
	    set rajout3 "hgmdID\thgmdPhenotype\thgmdPubMedId\trsID\trsValidation\trsClinicalSignificance\trsAncestralAllele\trsHeterozygosity\trsMAF\trsMAFAllele\trsMAFCount"
	    set rajout4 "espRefEACount\tespRefAACount\tespRefAllCount\tespAltEACount\tespAltAACount\tespAltAllCount\tespEAMAF\tespAAMAF\tespAllMAF\tespAvgReadDepth"
	    set rajout5 "delta MESscore (%)\twtMEScore\tvarMEScore\tdelta SSFscore (%)\twtSSFScore\tvarSSFScore\tdelta NNSscore (%)\twtNNSScore\tvarNNSScore\tDistNearestSS\tNearestSS\tlocalSpliceEffect"
	    set rajout6 "SiftPred\tSiftWeight\tSiftMedian\tPPH2pred\tphyloP\tPhastCons\tGranthamDist"
	    set rajout7 "VaRank_VarScore\tAlamutAnalysis\tAvg_TotalDepth\tSD_TotalDepth\tCount_TotalDepth\tfamilyBarcode\tBarcode\tHom_Count\tHet_Count\tAllele_Count\tSample_Count"

	    set    RankingText($patient) "## Barcode: $g_allPatients"
	    append RankingText($patient) "\n## FamilyBarcode: $g_lPatientsOf($fam)"

	    if {[info exists g_vcfINFOS_Supp(Header)] && [set g_vcfINFOS_Supp(Header)] != {}} {
		set rajout8 "[join [set g_vcfINFOS_Supp(Header)] "\t"]"

		append RankingText($patient) "\n$rajout1\t$g_perso(HL)\t$rajout2\t$rajout3\t$rajout4\t$rajout5\t$rajout6\t$rajout7\t$rajout8"
	    } else {
		append RankingText($patient) "\n$rajout1\t$g_perso(HL)\t$rajout2\t$rajout3\t$rajout4\t$rajout5\t$rajout6\t$rajout7"
	    }
	    if {[info exists g_VaRank(extann)] && $g_VaRank(extann) != ""} {

		#puts ">>>>>>>>>>>>>>>>>>>>>>>>>>"
		ExternalAnnotations
		foreach F [ExternalAnnotations L_Files] {
		    #puts $F 
		    #puts "Header >[ExternalAnnotations $F Header]<"
		    append RankingText($patient) "\t[ExternalAnnotations $F Header]"
		}
	    }
	}
    }

    ## Search column numbers in the HEADER to parse in g_ALAMUT
    ###########################################################
    set HEADER [split $g_ALAMUT(#id) "\t"]
    set lValRajout1 [list {i_gene gene} {i_omimId omimId} {i_transcript transcript} {i_transLen transLen} {i_uniprot Uniprot} {i_refseqp protein} {i_pos posAA} {i_AA wtAA_1} {i_var varAA_1}]
    
    set lValRajout2 [list {i_varType varType} {i_effect codingEffect} {i_varloc varLocation} {i_exon exon} {i_intron intron} {i_gnomen gNomen} {i_cnomen cNomen} {i_pnomen pNomen}]
  
    set lValRajout3 [list {i_hgmdId hgmdId} {i_hgmdPhen hgmdPhenotype} {i_hgmdPub hgmdPubMedId} {i_rsId rsId} {i_rsVal rsValidations} {i_rsClin rsClinicalSignificance} {i_rsAnc rsAncestralAllele} {i_rsHtz rsHeterozygosity} {i_rsMAF rsMAF} {i_rsMAFAllele rsMAFAllele} {i_rsMAFCount rsMAFCount}]
    
    set lValRajout4 [list {i_espRefEACount espRefEACount} {i_espRefAACount espRefAACount} {i_espRefAllCount espRefAllCount} {i_espAltEACount espAltEACount} {i_espAltAACount espAltAACount} {i_espAltAllCount espAltAllCount} {i_espEAMAF espEAMAF} {i_espAAMAF espAAMAF} {i_espAllMAF espAllMAF} {i_espAvgReadDepth espAvgReadDepth}]
    
    set lValRajout5 [list {i_wtMaxEntScore wtMaxEntScore} {i_varMaxEntScore varMaxEntScore} {i_wtSSFScore wtSSFScore} {i_varSSFScore varSSFScore} {i_wtNNSScore wtNNSScore} {i_varNNSScore varNNSScore} {i_distSS distNearestSS} {i_SStype nearestSSType} {i_SplEff localSpliceEffect}]

    set lValRajout6 [list {i_SIFTp SIFTprediction} {i_SIFTw SIFTweight} {i_SIFTm SIFTmedian} {i_pph2 PPH2class} {i_phyloP phyloP} {i_phastCons phastCons} {i_grantham granthamDist}]
    
    set lVal "$lValRajout1 $lValRajout2 $lValRajout3 $lValRajout4 $lValRajout5 $lValRajout6"
    foreach val $lVal {
	set iCol    [lindex $val 0]
	set colName [lindex $val 1]
	#set $iCol [lsearch -regexp $HEADER "^$colName"]; if {[set $iCol] == -1} {puts "column number not found for $colName - Exit"; exit}

	#TEST if column is missing
	set $iCol [lsearch -regexp $HEADER "^$colName"]; if {[set $iCol] == -1} {puts "column number not found for $colName - should Exit"}
    }
    
    #Alamut coding effect useful for statistics
    set L_codingEffect [list synonymous missense nonsense In-frame Frameshift startloss stoploss]
    set L_varLocation  [list intron upstream "5'UTR" "3'UTR" downstream "splice site"]
    
    set initTime [clock clicks -milliseconds]

    ## Downloading genetic variants analysed by alamut.
    ## AlamutAnalysis = "yes"
    ## g_lScore is sorted in descending order of scores (with no more redundancy).
    ##############################################################################
    puts "\t...organizing ranking output from alamut data ([llength $g_lScore] scores)"

    foreach duoIDscore $g_lScore {
	set ID [lindex $duoIDscore 0]
	set L  [split $g_ALAMUT($ID) "\t"]

	foreach val $lVal {
	    set iCol [lindex $val 0]
	    
	    #DEBUG
	    #puts "iCol $iCol - [set $iCol]"
	    

	    regsub "^i_" $iCol "" colValue
	    if {$iCol==-1} {
		set $colValue NA
	    } else {
		regsub -all " " [lindex $L [set $iCol]] "" $colValue 
	    }
	    if {$colName=="PPH2class"} {set }
	    

	    #DEBUG
	    #puts "iCol $iCol - [set $iCol] - $colValue"
	}

	#UPDATE Variation is computed but no patient has it... to be done in line with the alamut input file checking
	#if {![info exists $g_vcfINFOS($ID)]} {continue}
	
	# If rsID is given by VCF, we can keep the rsID and the rsValidated informations
	if {[set g_VaRank(rsFromVCF)]=="yes"} {

	    set rsIdVCF [lindex $g_vcfINFOS($ID) 4]

	    #Rs from VCF is GOOD
	    if {![isNotAnRS $rsIdVCF]} {
		set rsId  $rsIdVCF
		set rsVal [lindex $g_vcfINFOS($ID) 5]
	    } else {
		# rsID from vcf is not a good rsID testing from Alamut
		if {[isNotAnRS $rsId]} {
		    set rsId "NA"; set rsVal "NA"
		} 
	    }
	}
	
	# rajout7:
	set score [lindex $duoIDscore 1]

	#Converting empty frequencies into -1 allow to filter for below 0.01
	foreach V [list rsMAF espEAMAF espAAMAF espAllMAF] {
	    if {[set $V]=="NA"} {set $V "-1";continue}

	    #Change also metrics to "." to ","
	    if {[set g_VaRank(metrics)]=="fr"} {regsub {\.} [set $V] "," $V}
	}

	set start [lindex $g_vcfINFOS($ID) 1]
	set ref   [lindex $g_vcfINFOS($ID) 2]
	set altn  [lindex $g_vcfINFOS($ID) 3]
	set end   [expr {$start+[string length $ref]-1}]

	set rajout1 "$ID\t$gene\t$omimId\t$transcript\t$transLen\t[lindex $g_vcfINFOS($ID) 0]\t$start\t$end\t$ref\t$altn\t$uniprot\t$refseqp\t$pos\t$AA\t$var"
	set rajout2 "$varType\t$effect\t$varloc\t$exon\t$intron\t$gnomen\t$cnomen\t$pnomen"
	set rajout3 "$hgmdId\t$hgmdPhen\t$hgmdPub\t$rsId\t$rsVal\t$rsClin\t$rsAnc\t$rsHtz\t$rsMAF\t$rsMAFAllele\t$rsMAFCount"
	set rajout4 "$espRefEACount\t$espRefAACount\t$espRefAllCount\t$espAltEACount\t$espAltAACount\t$espAltAllCount\t$espEAMAF\t$espAAMAF\t$espAllMAF\t$espAvgReadDepth"
	set rajout5 "$g_deltaMES($ID,$transcript)\t$wtMaxEntScore\t$varMaxEntScore\t$g_deltaSSF($ID,$transcript)\t$wtSSFScore\t$varSSFScore\t$g_deltaNNS($ID,$transcript)\t$wtNNSScore\t$varNNSScore\t$distSS\t$SStype\t$SplEff"
	set rajout6 "$SIFTp\t$SIFTw\t$SIFTm\t$pph2\t$phyloP\t$phastCons\t$grantham"

	if {[info exists g_VaRank(DEBUG)]} {puts "$ID $transcript : $g_deltaMES($ID,$transcript)\t$varMaxEntScore\t$g_deltaSSF($ID,$transcript)"}

	## rajout7:
	set infos ""
	set infos [findBarcodesAndStatFor $ID]
	set barcode     [lindex $infos 0]
	set HomCount    [lindex $infos 1]
	set HetCount    [lindex $infos 2]
	set allCount    [lindex $infos 3]
	set sampleCount [lindex $infos 4]
	    
	#set Mean_SNVDepth     [lindex $infos 5]
	#set SD_SNVDepth       [lindex $infos 6]
	#set Counts_SNVDepth   [lindex $infos 7]
	set Mean_TotalDepth   [lindex $infos 8]
	set SD_TotalDepth     [lindex $infos 9]
	set Counts_TotalDepth [lindex $infos 10]

	#Collecting data for the global statistics
	#
	if {![info exists g_Statistics(All,byeffect)]} {
	    foreach e [concat $L_codingEffect $L_varLocation "unknown"] {
		set g_Statistics(All,$e)   0
	    }
	    set g_Statistics(All,byeffect) 0
	} 
	
	incr g_Statistics(All,byeffect) 
	
	#Counting for coding effect
	#
	if {$effect!="NA" && $effect!=""} {
	    incr g_Statistics(All,$effect)  
	    
	    #if {$effect=="Frameshift"} {puts "$ID $effect"}
	} else {
	    if {$varloc!="NA" && $varloc!=""} {
		incr g_Statistics(All,$varloc)  
	    }
	}

	foreach fam [array names g_lPatientsOf] {
	    foreach patient $g_lPatientsOf($fam) {

		#puts "$patient: $g_vcfINFOS($ID)"
		
		if {![info exists g_vcfINFOS($ID,$patient)]} {continue}
		#if {![regexp "$patient:" $g_vcfINFOS($ID)]} {continue}

		set rajout7 "$score\tyes\t$Mean_TotalDepth\t$SD_TotalDepth\t$Counts_TotalDepth\t'$g_famBarcode($fam)'\t'$barcode'\t$HomCount\t$HetCount\t$allCount\t$sampleCount"

		#Collecting data for the statistics per patient
		#
		if {![info exists g_Statistics($patient)]} {
		    foreach e [concat $L_codingEffect $L_varLocation "unknown"] {
			set g_Statistics($patient,$e)      0
			set g_Statistics($patient,$e,Hom)  0
			set g_Statistics($patient,$e,Het)  0
			set g_Statistics($patient,$e,Null) 0
		    }
		    set g_Statistics($patient) 0
		} 

		incr g_Statistics($patient) 

		set HomHet [lindex [split $g_perso($patient) "\t"] 0]

		#Counting for coding effect
		#
		if {$effect!="NA" && $effect!=""} {
		    incr g_Statistics($patient,$effect)  
		    if {[regexp "hom" $HomHet]} {
			incr g_Statistics($patient,$effect,Hom)  
		    } elseif {[regexp "het" $HomHet]} {
			incr g_Statistics($patient,$effect,Het)  
		    } else {
			incr g_Statistics($patient,$effect,Null)
		    }
		} else {
		    if {$varloc!="NA" && $varloc!=""} {
			incr g_Statistics($patient,$varloc)  
			if {[regexp "hom" $HomHet]} {
			    incr g_Statistics($patient,$varloc,Hom)  
			} elseif {[regexp "het" $HomHet]} {
			    incr g_Statistics($patient,$varloc,Het)  
			} else {
			    incr g_Statistics($patient,$varloc,Null)
			}
		    }
		}

		## rajout 8 pour les infos VCF
		set l_Infos_VCF  {}
		set l_headers_ID {}
		set l_data_ID    {}
		
		set rajout8 {}

		if {[set g_VaRank(vcfInfo)]=="yes"} {
		    set l_Infos_VCF ""
		    set l_Infos_VCF [set g_vcfINFOS_Supp($ID,$patient)]
		    
		    foreach infos_VCF $l_Infos_VCF {
			set l_Header_Infos_VCF ""
			set Header ""
			set Data   ""
			
			if {[regexp "=" $infos_VCF]} {
			    set l_Header_Infos_VCF [split $infos_VCF "="]
			    
			    set Header [lindex $l_Header_Infos_VCF 0]
			    set Data   [lindex $l_Header_Infos_VCF 1]
			} else {
			    set l_Header_Infos_VCF $infos_VCF
			    
			    set Header $l_Header_Infos_VCF
			    set Data   $l_Header_Infos_VCF
			}
			
			lappend l_headers_ID $Header
			lappend l_data_ID    $Data
		    }

		    foreach NewHeaders_VCF [set g_vcfINFOS_Supp(Header)] {
			set  i_header [lsearch -exact  $l_headers_ID $NewHeaders_VCF]
			if {$i_header == -1} {lappend rajout8 "NA"} else {lappend rajout8 [lindex $l_data_ID $i_header]}
		    }
		    set rajout8 [join $rajout8 "\t"]
		}
		
		if {$rajout8!={}} {
		    append RankingText($patient) "\n$rajout1\t$g_perso($patient)\t$rajout2\t$rajout3\t$rajout4\t$rajout5\t$rajout6\t$rajout7\t$rajout8"
		} else {
		    append RankingText($patient) "\n$rajout1\t$g_perso($patient)\t$rajout2\t$rajout3\t$rajout4\t$rajout5\t$rajout6\t$rajout7"
		}
		#Adding external user annotations at the end of the output files
		if {[info exists g_VaRank(extann)] && $g_VaRank(extann) != ""} {
		    
		    foreach F [ExternalAnnotations L_Files] {
			#puts $F 
			#puts "Header >[ExternalAnnotations $F Header]<"
				
			set AnnotFound 0
			
			#puts ">>>$gene"
			
			foreach g [split $gene "/"] {
			    #puts "testing $g"
			    if {[ExternalAnnotations $F $g]!=""} {
				append RankingText($patient) "\t$g\t[ExternalAnnotations $F $g]"
				set AnnotFound 1
				break
			    } else {
				set  i_Searched_gene [lsearch -regexp [ExternalAnnotations $F L_ID] $g]
				if {$i_Searched_gene== -1} {
				} else {
				    set gene_tmp [lindex [ExternalAnnotations $F L_ID] $i_Searched_gene]
				    append RankingText($patient) "\t$gene_tmp\t[ExternalAnnotations $F $gene_tmp]"
				    #puts "SAVING $g by $gene_tmp"
				    set AnnotFound 1
				    break
				}
			    }
			}
			
			if {$AnnotFound==0} {
			    set NbHeader [llength [split [ExternalAnnotations $F Header] "\t"]]
			    append RankingText($patient) "\t[join [lrepeat $NbHeader NA] "\t"]"
			}
		    }
		}
	    }
	}	
    }
    set EndTime  [clock clicks -milliseconds]
    #puts "Time: [expr $EndTime - $initTime*1.0]"
    
    ## Downloading genetic variants not analysed by alamut.
    ## AlamutAnalysis = "no"
    #######################################################
    puts "\t...organizing ranking output from data not analysed by alamut"

    foreach ID [set g_vcfINFOS(L_IDs)] {
	#foreach ID [array names g_vcfINFOS] {}
	if {[info exists g_ALAMUT($ID)]} {continue}
	set chrom [lindex $g_vcfINFOS($ID) 0]
	set start [lindex $g_vcfINFOS($ID) 1]
	set ref   [lindex $g_vcfINFOS($ID) 2]
	set altn  [lindex $g_vcfINFOS($ID) 3]
	set end   [expr {$start+[string length $ref]-1}]

	# If rsID is given by VCF, we can keep the rsID and the rsValidated informations
	if {[set g_VaRank(rsFromVCF)]=="yes"} {

	    set rsIdVCF [lindex $g_vcfINFOS($ID) 4]

	    #rs from VCF is GOOD
	    if {![isNotAnRS $rsIdVCF]} {
		set rsId  $rsIdVCF
		set rsVal [lindex $g_vcfINFOS($ID) 5]
	    } else {
		set rsId "NA"; set rsVal "NA"
	    }
	} else {
	    set rsId "NA"; set rsVal "NA"
	}

	set infos ""
	set infos       [findBarcodesAndStatFor $ID]
	set barcode     [lindex $infos 0]
	set HomCount    [lindex $infos 1]
	set HetCount    [lindex $infos 2]
	set allCount    [lindex $infos 3]
	set sampleCount [lindex $infos 4]

	#set Mean_SNVDepth     [lindex $infos 5]
	#set SD_SNVDepth       [lindex $infos 6]
	#set Counts_SNVDepth   [lindex $infos 7]
	set Mean_TotalDepth   [lindex $infos 8]
	set SD_TotalDepth     [lindex $infos 9]
	set Counts_TotalDepth [lindex $infos 10]

	#Collecting data for the global statistics
	#
	if {![info exists g_Statistics(All,byeffect)]} {
	    foreach e [concat $L_codingEffect $L_varLocation "unknown"] {
		set g_Statistics(All,$e)   0
	    }
	    set g_Statistics(All,byeffect) 0
	} 
	
	incr g_Statistics(All,byeffect) 
	incr g_Statistics(All,unknown)
	
	foreach fam [array names g_lPatientsOf] {
	    foreach patient $g_lPatientsOf($fam) {

		if {[info exists g_vcfINFOS($ID,$patient)]} {
		

		    #set rajout1 "$ID\t$gene\t$omimId\t$transcript\t$transLen\t[lindex $g_vcfINFOS($ID) 0]\t$start\t$end\t$ref\t$altn\t$uniprot\t$refseqp\t$pos\t$AA\t$var"
		    #set rajout2 "$varType\t$effect\t$varloc\t$exon\t$intron\t$gnomen\t$cnomen\t$pnomen"
		    #set rajout3 "$hgmdId\t$hgmdPhen\t$hgmdPub\t$rsId\t$rsVal\t$rsClin\t$rsAnc\t$rsHtz\t$rsMAF\t$rsMAFAllele\t$rsMAFCount"
		    #set rajout4 "$espRefEACount\t$espRefAACount\t$espRefAllCount\t$espAltEACount\t$espAltAACount\t$espAltAllCount\t$espEAMAF\t$espAAMAF\t$espAllMAF\t$espAvgReadDepth"
		    #set rajout5 "$g_deltaMES($ID,$transcript)\t$varMaxEntScore\t$g_deltaSSF($ID,$transcript)\t$varSSFScore\t$g_deltaNNS($ID,$transcript)\t$varNNSScore\t$distSS\t$SStype\t$SplEff"
		    #set rajout6 "$SIFTp\t$SIFTw\t$SIFTm\t$pph2\t$phyloP\t$phastCons\t$grantham"

		    #append RankingText($patient) "\n$ID\tNA\tNA\t$vcfpos\tNA\tNA\t$chrom\t$start\t$end\t$ref\t$altn\tNA\tNA\tNA\tNA\tNA" ; #rajout1
		    append RankingText($patient) "\n$ID\tNA\tNA\tNA\tNA\t$chrom\t$start\t$end\t$ref\t$altn\tNA\tNA\tNA\tNA\tNA" ; #rajout1
		    append RankingText($patient) "\t$g_perso($patient)"
		    append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout2
		    #append RankingText($patient) "\tNA\tNA\tNA\t$rsId\t$rsVal\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout3
		    append RankingText($patient) "\tNA\tNA\tNA\t$rsId\t$rsVal\tNA\tNA\tNA\t-1\tNA\tNA" ; #rajout3
		    #append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout4
		    append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\t-1\t-1\t-1\tNA" ; #rajout4
		    append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout5
		    #append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout5
		    append RankingText($patient) "\tNA\tNA\tNA\tNA\tNA\tNA\tNA" ; #rajout6
		    append RankingText($patient) "\t0\tno\t$Mean_TotalDepth\t$SD_TotalDepth\t$Counts_TotalDepth\t'$g_famBarcode($fam)'\t'$barcode'\t$HomCount\t$HetCount\t$allCount\t$sampleCount";#rajout7
		    #append RankingText($patient) "\t0\tno\t'$g_famBarcode($fam)'\t'$barcode'\t$HomCount\t$HetCount\t$allCount\t$sampleCount";#rajout7
		    
		    #Collecting data for the statistics per patient
		    #
		    if {![info exists g_Statistics($patient)]} {
			foreach e [concat $L_codingEffect "intron" "unknown"] {
			    set g_Statistics($patient,$e)      0
			    set g_Statistics($patient,$e,Hom)  0
			    set g_Statistics($patient,$e,Het)  0
			    set g_Statistics($patient,$e,Null) 0
			}
			set g_Statistics($patient) 0
		    } 
		    incr g_Statistics($patient) 
		    
		    incr g_Statistics($patient,unknown)

		    set HomHet [lindex [split $g_perso($patient) "\t"] 0]
		    
		    if {[regexp "hom" $HomHet]} {
			incr g_Statistics($patient,unknown,Hom)
		    } elseif {[regexp "het" $HomHet]} {
			incr g_Statistics($patient,unknown,Het)
		    } else {
			incr g_Statistics($patient,unknown,Null)
		    }
		    
		    ## rajout 8 pour les infos VCF
		    set l_Infos_VCF  {}
		    set l_headers_ID {}
		    set l_data_ID    {}
		    
		    set rajout8 {}
		    
		    if {[set g_VaRank(vcfInfo)]=="yes"} {
			set l_Infos_VCF ""
			set l_Infos_VCF [set g_vcfINFOS_Supp($ID,$patient)]
			
			foreach infos_VCF $l_Infos_VCF {
			    set l_Header_Infos_VCF ""
			    set Header ""
			    set Data   ""
			    
			    if {[regexp "=" $infos_VCF]} {
				set l_Header_Infos_VCF [split $infos_VCF "="]
				
				set Header [lindex $l_Header_Infos_VCF 0]
				set Data   [lindex $l_Header_Infos_VCF 1]
			    } else {
				set l_Header_Infos_VCF $infos_VCF
				
				set Header $l_Header_Infos_VCF
				set Data   $l_Header_Infos_VCF
			    }
			    
			    lappend l_headers_ID $Header
			    lappend l_data_ID    $Data
			}
			
			foreach NewHeaders_VCF [set g_vcfINFOS_Supp(Header)] {
			    set  i_header [lsearch -exact  $l_headers_ID $NewHeaders_VCF]
			    if {$i_header == -1} {lappend rajout8 "NA"} else {lappend rajout8 [lindex $l_data_ID $i_header]}
			}
			set  rajout8 [join $rajout8 "\t"]
			if {$rajout8!={}} {
			    append RankingText($patient) "\t$rajout8"
			} 
		    }
		    #Adding external user annotations at the end of the output files
		    if {[info exists g_VaRank(extann)] && $g_VaRank(extann) != ""} {
			foreach F [ExternalAnnotations L_Files] {
			    #puts $F 
			    set NbHeader [llength [split [ExternalAnnotations $F Header] "\t"]]
			    append RankingText($patient) "\t[join [lrepeat $NbHeader NA] "\t"]"
			    #puts "Header >[ExternalAnnotations $F Header]< ---> $NbHeader"
			    #puts "\t[join [lrepeat $NbHeader NA] "\t"]"
			}
		    }
		}
	    }
	}
    }	
    
    ## Writing "*_allVariants.rankingByVar" output files
    ####################################################
    puts "\t...writing \"*_allVariants.rankingByVar\" output files"
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set outputfile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByVar.tsv"
	    ReplaceTextInFile "$RankingText($patient)" $outputfile
	}
    }

    ## Cleaning
    ###########
    #if {[info exist g_perso]} {unset g_perso}
    if {[info exist g_perso]} {array unset g_perso "*"}

    return
}

##
## Ranking by gene for all variants, creation of 1 output file by patient.
## No filter applied on these output files.
##
## OUTPUTS: g_VaRank(vcfDir)/"family"_"patient"_allVariants.rankingByGene.tsv (1 by patient)
##

proc writeAllVariantsRankingByGene {} {

    global g_VaRank
    global g_lPatientsOf
    global g_allPatients
    global g_vcfINFOS

    puts "...writing output files: all variants, ranking by gene ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"

    ## Checking if all these output files already exist
    ###################################################
    set allFilesExist 1
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set outputfile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByGene.tsv"
	    if {![file exists $outputfile]} {set allFilesExist 0; break}
	}
    }
    if {$allFilesExist} {
	puts "\t...\"*_allVariants.rankingByGene\" already exist, continue"
	return
    }
    
    #
    #JEAN TO BE REORGANIZED TAKING TOO MUCH MEMORY AND TIME, SHOULD BE SEPRATED INTO ONE BY ONE
    #

    ## Searches for all htz and hom SNV, for each patient and for each gene:
    ## --> creation of lVariants($pat,$gene) variables
    ## --> creation of lGenes and lID variables
    ##########################################################################
    set lGenes {}
    set lID    {}
    puts "\t...searching for all variants ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set rankFile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByVar.tsv"
	    if {![file exists $rankFile]} {
		puts "WARNING: $rankFile doesn't exist."
		continue
	    }

	    foreach L [LinesFromFile $rankFile] {
		if {$L == ""} {continue}
		if {[regexp "^## Barcode: (.*)" $L match allExomes]} {set textAllEx "$L"; continue}
		if {[regexp "^##" $L]} {continue}
		if {[regexp "^VariantID" $L]} {
		    set HeaderText "$L"
		    set L [split $L "\t"]
		    set i_gene  [lsearch -regexp $L "^Gene$"];            if {$i_gene  == -1} {puts "Gene: column number not found - Exit"; exit}
		    set i_score [lsearch -regexp $L "^VaRank_VarScore$"]; if {$i_score == -1} {puts "VaRank_VarScore: column number not found - Exit"; exit}
		    continue
		}
		set L [split $L "\t"]
		regsub -all " " [lindex $L 0]       "" id
		regsub -all " " [lindex $L $i_gene] "" gene
		if {$gene == "NA"} {continue}
		## In case of value like: gene = "MC1R/TUBB3", each gene have to be treated separately.
		## (Else, bug to assembly variants from gene = "MC1R/TUBB3" with variants from gene = "MC1R")
		## => but, by this way, we introduce redondancy in "*allVariants.rankingByGene.tsv" files.
		foreach gene [split $gene "/"] {
		    if {![info exists Tab($gene)]} {set Tab($gene) 1;lappend lGenes $gene}
		    if {![info exists Tab($id)]}   {set Tab($id)   1;lappend lID    $id}

		    #if {[lsearch -exact $lID "$id"]      == -1} {lappend lID $id}
		    #if {[lsearch -exact $lGenes "$gene"] == -1} {lappend lGenes $gene}
		    regsub -all " " [lindex $L $i_score] "" score
		    
		    lappend lVariants($patient,$gene) "$id $score"
		}
	    }
	}
    }

    ## Calculate the score for each gene:
    ## - If the most deleterious variant is hom ("scoreMDHom"), gene score = "scoreMDhom" x 2
    ## - If the most deleterious variant is het ("scoreMDHet"): 
    ##		- if there is another variant in the same gene, gene score = "scoreMDHet" + "following score"
    ##		- if there isn't another variant in the same gene, gene score = "scoreMDHet" x 2
    ##
    ## --> creation of the variable lVariantsRankingByGene($patient)
    ###########################################################################################################
    puts "\t...scoring of each gene  ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
    foreach patient $g_allPatients {

	foreach gene $lGenes {
	    if {![info exists lVariants($patient,$gene)]} {continue}
	    set lVariants($patient,$gene) [lsort -command DescendingSortOnElement1 $lVariants($patient,$gene)]
	    set liste {}
	    foreach duoIDScore $lVariants($patient,$gene) {
		set id [lindex $duoIDScore 0]
		lappend liste $id
	    }
	    set maxScore1 [lindex [lindex $lVariants($patient,$gene) 0] 1]
	    if {[llength $lVariants($patient,$gene)] == 1} {
		set BestDuoScore "[expr {$maxScore1*2}]"
	    } else {
		set bestID [lindex [lindex $lVariants($patient,$gene) 0] 0]
		if {[regexp "$patient:(\[^: \]+):" $g_vcfINFOS($bestID) match homhtz] && [regexp -nocase "hom" $homhtz]} {
		    set BestDuoScore "[expr {$maxScore1*2}]"
		} else {
		    set maxScore2 [lindex [lindex $lVariants($patient,$gene) 1] 1]
		    set BestDuoScore "[expr {$maxScore1+$maxScore2}]"
		}
	    }
	    lappend lVariantsRankingByGene($patient) "{$liste} $BestDuoScore"
	}
	if {[info exists lVariantsRankingByGene($patient)]} {
	    set lVariantsRankingByGene($patient) [lsort -command DescendingSortOnElement1 $lVariantsRankingByGene($patient)]
	}
    }

    #unset 
    #if {[info exist lVariantsRankingByGene]} {array unset lVariantsRankingByGene "*"}
    # unset lVariants ???

    ## Writing of the outputs
    #########################
    puts "\t...writing \"*_allVariants.rankingByGene.tsv\" output files ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    set outputfile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByGene.tsv"
	    ReplaceTextInFile "$textAllEx\n## FamilyBarcode: $g_lPatientsOf($fam)\n$HeaderText" $outputfile
	    
	    if {![info exists lVariantsRankingByGene($patient)]} {continue}
	    
	    ## Downloading each ranking file line.
	    set  rankFile "$g_VaRank(vcfDir)/[set fam]_[set patient]_allVariants.rankingByVar.tsv"
	    if {$rankFile == ""} {continue}

	    foreach L [LinesFromFile $rankFile] {
		regsub -all " " [lindex $L 0] "" id
		
		if {[info exists Tab($id)]} {set ligne($patient,$id) $L}

		#if {[lsearch -exact $lID "$id"] != -1} {set ligne($patient,$id) $L}
	    }
	    
	    ## writing	
	    #set initTime [clock clicks -milliseconds]
	    #set EndTime  [clock clicks -milliseconds]
	    #puts "Time: [expr $EndTime - $initTime*1.0]"
		
	    foreach el $lVariantsRankingByGene($patient) {

		set i 0
		set L_Lines {}
		foreach id [lindex $el 0] {
		    incr i
		    
		    if {$i>10000} {
			set L_Lines {}
			set i 0
			WriteTextInFile [join $L_Lines "\n"] $outputfile
		    }
		    lappend L_Lines "$ligne($patient,$id)"

		    #OLD WAY
		    #JEAN could be speed up by writing more than one line at a time
		    #WriteTextInFile "$ligne($patient,$id)" $outputfile
		}
		
		if {$L_Lines != {}} {
		    WriteTextInFile [join $L_Lines "\n"] $outputfile
		    set L_Lines {}
		    set i 0
		}
	    }
	}
    }

    return
}

