##########################################################################
# VaRank 1.0                                                             #
#                                                                        #
# VaRank: a simple and powerful tool for ranking genetic variants        #
#                                                                        #
# Copyright (C) 2014 Veronique Geoffroy (veronique.geoffroy@inserm.fr)   # 
#                    Jean Muller (jeanmuller@unistra.fr)                 # 
#                                                                        #
# Please cite the following article:                                     #
#    XXX                                                                 #
#                                                                        #
# This is part of VaRank source code.                                    #
#                                                                        #
# This program is free software; you can redistribute it and/or          #
# modify it under the terms of the GNU General Public License            # 
# as published by the Free Software Foundation; either version 3         # 
# of the License, or (at your option) any later version.                 #
#                                                                        #
# This program is distributed in the hope that it will be useful,        # 
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with this program; If not, see <http://www.gnu.org/licenses/>.   #
##########################################################################

proc AssignID {chrom pos ref alt} {

    set simples [SimplifyVariation $pos $ref $alt]
    set pos [lindex $simples 0]
    set ref [lindex $simples 1]
    set alt [lindex $simples 2]
    set ID "${chrom}_${pos}_${ref}_${alt}"
    return $ID
}

proc SimplifyVariation {pos ref alt} {

    # SNV like...: pos 12345, ATGG > ATTG (length-ref = length-alt)
    # ==> Transformed with: pos 12347, G>T (removes the two identical ends)

    # INS like... : pos 12345, CA > CATT
    # ==> Transformed with: pos 12346, A > ATT

    # DEL like... : pos 12345, CATT > CA
    # ==> Transformed with: pos 12346, ATT > A

    # No simplification for something like: TGCTGCTGCCAC > CTGCTGC
    #                                          -------

    # No simplification if ref or alt > 400 NT


    if {[string length $ref] > 400 || [string length $alt] > 400} {
	# Special case not treated:
	return "$pos $ref $alt"
    }

    set newalt "$alt"
    set newref "$ref"
    set newpos $pos

    if {[string length $ref] == [string length $alt]} {
        ### Case of a SNV with two identical ends to remove:
	set test 0
	foreach NTref [split $ref ""] NTalt [split $alt ""] {
	    if {$test} {
		append addref $NTref
		append addalt $NTalt
		if {$NTref != $NTalt} {
		    append newref $addref
		    append newalt $addalt
		    set addref ""
		    set addalt ""
		}
		continue
	    }
	    if {$NTref == $NTalt} {
		incr newpos
	    } else {
		set test 1
		set newref $NTref
		set newalt $NTalt
		set addref ""
		set addalt ""
	    }
	}
    } elseif {[string length $ref] < [string length $alt]} {
        ### Case of an INS:
	if {[regexp "^${ref}(.*)$" $alt match ins]} {
	    set commun [string index $ref end]
	    set newref $commun
	    set newalt "${commun}${ins}"
	    set newpos [expr {$pos+[string length $ref]-1}]
	}
    } else {
        ### Case of a DEL:
	if {[regexp "^${alt}(.*)$" $ref match del]} {
	    set commun [string index $alt end]
	    set newalt $commun
	    set newref ${commun}${del}
	    set newpos [expr {$pos+[string length $alt]-1}]
	}
    }

    return "$newpos $newref $newalt"

    # puts "42456670 C > CTCTT ==> [SimplifyVariation 42456670 C CTCTT] // 42456670 C > CTCTT" 
    # puts "12345 A > G ==> [SimplifyVariation 12345 A G] // 12345 A > G"
    # puts "12345 ATGG > ATTG ==> [SimplifyVariation 12345 ATGG ATTG] // 12347 G > T"
    # puts "12345 CA > TTCA ==> [SimplifyVariation 12345 CA TTCA] // 12345 CA > TTCA"
    # puts "12345 CA > CATT ==> [SimplifyVariation 12345 CA CATT] // 12346 A > ATT"
    # puts "12345 CATT > CA ==> [SimplifyVariation 12345 CATT CA] // 12346 ATT > A"
    # puts "12 GCCT > GC ==> [SimplifyVariation 12 GCCT GC] // 13 CCT > C"

}



## Parsing of VCF input file(s).
## Output:
## 	2 global variables:
##	- g_allPatients = "patient1 patient2 ..." 
##	- g_vcfINFOS(ID) = "chrom pos ref alt rsID rsValidation patient1:homhet:dp:nr:qual patient2:homhet:dp:nr:qual ..."
##	  If ID is absent from patient1, so "patient1:homhet:dp:nr:qual" is not put in memory.
##
##  Empty data (./.) and wild type variation are filtered out for every patient individually
##
proc parseVCFfiles {} {

    global g_VaRank
    global g_vcfINFOS
    global g_vcfINFOS_Supp
    global g_allPatients
    global g_lPatientsOf
    global g_Statistics

    #VCF tools
    #
    #http://vcftools.sourceforge.net/
    #

    #VCF files generated from Ion torrent platform Lille files contains
    #
    #GT:GQ:GL:DP:FDP:AD:APSD:AST:ABQV 
    #
    # "##FORMAT=<ID=AB,Number=1,Type=Float,Description=""Allele balance for each het genotype"">"           
    # "##FORMAT=<ID=ABQV,Number=.,Type=Integer,Description=""Allelic average base qv for the REF and ALT alleles in the order listed  in the ALT field"">"
    # "##FORMAT=<ID=AD,Number=.,Type=Integer,Description=""Allelic depths for the ref and alt alleles in the order listed"">"
    # "##FORMAT=<ID=APSD,Number=.,Type=Integer,Description=""Allelic specific plus strand depths for the REF and ALT alleles in the order listed in the ALT field"">"
    # "##FORMAT=<ID=AST,Number=.,Type=Integer,Description=""Allelic unique start positions for the REF and ALT alleles in the order listed in the ALT field"">"
    # "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Total read depth per sample, including MQ0"">"               
    # "##FORMAT=<ID=FA,Number=.,Type=Float,Description=""Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"">"
    # "##FORMAT=<ID=FDP,Number=1,Type=Integer,Description=""Filtered Read Depth"">"                                                                   
    # "##FORMAT=<ID=GL,Number=.,Type=String,Description=""Genotype Likelihood, number of values is (#ALT+1)*(#ALT+2)/2"">"
    # "##FORMAT=<ID=GQ,Number=1,Type=Float,Description=""Genotype Quality"">"
    # "##FORMAT=<ID=GT,Number=1,Type=String,Description=""Genotype"">" 
    # "##FORMAT=<ID=MQ0,Number=1,Type=Integer,Description=""Number of Mapping Quality Zero Reads per sample"">"
    # "##FORMAT=<ID=PL,Number=G,Type=Integer,Description=""Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"">"
    # #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  IMLS_2013_1c
    # chr1    116243230       .       C       G       52.0    PASS    .       GT:GQ:GL:DP:FDP:AD:APSD:AST:ABQV        0/1:52.18:-4.22,-0.00,-10000.00:447:415:216,199:124,110:10,13:28,29
    
    #S6_IMLS_2013_4c_IonXpress_006_TSVC_variants.vcf then S8_IMLS_2013_6c_IonXpress_008_TSVC_variants.vcf
    #chr6    123703272       .       G       GT      100.0   .       AB=0.759;AC=2;AF=0.50;AN=2;BaseCounts=0,0,0,0;Bayesian_Score=100.00;DP=110;FS=0.000;HRun=7;HaplotypeScore=0.0000;IndelType=INS.NumRepetitions_7.EventLength_1.RepeatExpansion_T.;MQ=82.14;MQ0=0;QD=21.48;SB=-2504.77;Strand_Counts=ion_sample:1D,60,14,+TT,1,1,+T,11,22       GT:AD:DP:FA:GQ:MQ0:PL   1/1:74,33:110:0.300:0:0:2400,0,0
    #chr6    123703272       .       G       GT      100.0   .       AB=0.789;AC=2;AF=0.50;AN=2;BaseCounts=0,0,0,0;Bayesian_Score=100.00;DP=60;FS=0.000;HRun=7;HaplotypeScore=0.0000;IndelType=INS.NumRepetitions_7.EventLength_1.RepeatExpansion_T.;MQ=84.46;MQ0=0;QD=39.38;SB=-2504.77;Strand_Counts=ion_sample:1D,31,13,+T,5,11 GT:AD:DP:FA:GQ:MQ0:PL   1/1:44,16:60:0.267:0:0:2400,0,0


    #VCF files generated from Ion torrent platform Strasbourg using Lille files contains
    #
    #GT:GQ:GL:DP:FDP:AD:APSD:AST:ABQV 

    ##fileformat=VCFv4.1
    ##INFO=<ID=OID,Number=.,Type=String,Description="List of original Hotspot IDs">
    ##INFO=<ID=OPOS,Number=.,Type=Integer,Description="List of original allele positions">
    ##INFO=<ID=OREF,Number=.,Type=String,Description="List of original reference bases">
    ##INFO=<ID=OALT,Number=.,Type=String,Description="List of original variant bases">
    ##INFO=<ID=OMAPALT,Number=.,Type=String,Description="Maps OID,OPOS,OREF,OALT entries to specific ALT alleles">
    ##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
    ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
    ##FORMAT=<ID=FAO,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observation count">
    ##FORMAT=<ID=FDP,Number=1,Type=Integer,Description="Flow Evaluator Read Depth">
    ##FORMAT=<ID=FRO,Number=1,Type=Integer,Description="Flow Evaluator Reference allele observation count">
    ##FORMAT=<ID=FSAF,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observations on the forward strand">
    ##FORMAT=<ID=FSAR,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observations on the reverse strand">
    ##FORMAT=<ID=FSRF,Number=1,Type=Integer,Description="Flow Evaluator reference observations on the forward strand">
    ##FORMAT=<ID=FSRR,Number=1,Type=Integer,Description="Flow Evaluator reference observations on the reverse strand">
    ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
    ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
    ##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
    ##FORMAT=<ID=SAF,Number=A,Type=Integer,Description="Alternate allele observations on the forward strand">
    ##FORMAT=<ID=SAR,Number=A,Type=Integer,Description="Alternate allele observations on the reverse strand">
    ##FORMAT=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
    ##FORMAT=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
    ##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations">
    ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
    ##INFO=<ID=FAO,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observations">
    ##INFO=<ID=FDP,Number=1,Type=Integer,Description="Flow Evaluator read depth at the locus">
    ##INFO=<ID=FR,Number=1,Type=String,Description="Reason why the variant was filtered.">
    ##INFO=<ID=FRO,Number=1,Type=Integer,Description="Flow Evaluator Reference allele observations">
    ##INFO=<ID=FSAF,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observations on the forward strand">
    ##INFO=<ID=FSAR,Number=A,Type=Integer,Description="Flow Evaluator Alternate allele observations on the reverse strand">
    ##INFO=<ID=FSRF,Number=1,Type=Integer,Description="Flow Evaluator Reference observations on the forward strand">
    ##INFO=<ID=FSRR,Number=1,Type=Integer,Description="Flow Evaluator Reference observations on the reverse strand">
    ##INFO=<ID=FWDB,Number=A,Type=Float,Description="Forward strand bias in prediction.">
    ##INFO=<ID=FXX,Number=1,Type=Float,Description="Flow Evaluator failed read ratio">
    ##INFO=<ID=HRUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
    ##INFO=<ID=HS,Number=0,Type=Flag,Description="Indicate it is at a hot spot">
    ##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
    ##INFO=<ID=MLLD,Number=A,Type=Float,Description="Mean log-likelihood delta per read.">
    ##INFO=<ID=NR,Number=1,Type=String,Description="Reason why the variant is a No-Call.">
    ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
    ##INFO=<ID=QD,Number=1,Type=Float,Description="QualityByDepth as 4*QUAL/FDP (analogous to GATK)">
    ##INFO=<ID=RBI,Number=A,Type=Float,Description="Distance of bias parameters from zero.">
    ##INFO=<ID=REFB,Number=A,Type=Float,Description="Reference Hypothesis bias in prediction.">
    ##INFO=<ID=REVB,Number=A,Type=Float,Description="Reverse strand bias in prediction.">
    ##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observations">
    ##INFO=<ID=SAF,Number=A,Type=Integer,Description="Alternate allele observations on the forward strand">
    ##INFO=<ID=SAR,Number=A,Type=Integer,Description="Alternate allele observations on the reverse strand">
    ##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
    ##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
    ##INFO=<ID=SSEN,Number=A,Type=Float,Description="Strand-specific-error prediction on negative strand.">
    ##INFO=<ID=SSEP,Number=A,Type=Float,Description="Strand-specific-error prediction on positive strand.">
    ##INFO=<ID=SSSB,Number=A,Type=Float,Description="Strand-specific strand bias for allele.">
    ##INFO=<ID=STB,Number=A,Type=Float,Description="Strand bias in variant relative to reference.">
    ##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
    ##INFO=<ID=VARB,Number=A,Type=Float,Description="Variant Hypothesis bias in prediction.">
    ##LeftAlignVariants="analysis_type=LeftAlignVariants bypassFlowAlign=true kmer_len=19 min_var_count=5 short_suffix_match=5 min_indel_size=4 max_hp_length=8 min_var_freq=0.15 min_var_score=10.0 relative_strand_bias=0.8 output_mnv=0 sse_hp_size=0 sse_report_file= target_size=1.0 pref_kmer_max=3 pref_kmer_min=0 pref_delta_max=2 pref_delta_min=0 suff_kmer_max=3 suff_kmer_min=0 suff_delta_max=2 suff_delta_min=0 motif_min_ppv=0.2 generate_flow_position=0 input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/results/referenceLibrary/tmap-f3/hg19/hg19.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 combined_sample_name= num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/results/analysis/output/Home/NER-1-reanalyze_007/plugin_out/variantCaller_out/small_variants.sorted.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
    ##contig=<ID=chr1,length=249250621,assembly=hg19>
    ##contig=<ID=chr10,length=135534747,assembly=hg19>
    ##contig=<ID=chr11,length=135006516,assembly=hg19>
    ##contig=<ID=chr12,length=133851895,assembly=hg19>
    ##contig=<ID=chr13,length=115169878,assembly=hg19>
    ##contig=<ID=chr14,length=107349540,assembly=hg19>
    ##contig=<ID=chr15,length=102531392,assembly=hg19>
    ##contig=<ID=chr16,length=90354753,assembly=hg19>
    ##contig=<ID=chr17,length=81195210,assembly=hg19>
    ##contig=<ID=chr18,length=78077248,assembly=hg19>
    ##contig=<ID=chr19,length=59128983,assembly=hg19>
    ##contig=<ID=chr2,length=243199373,assembly=hg19>
    ##contig=<ID=chr20,length=63025520,assembly=hg19>
    ##contig=<ID=chr21,length=48129895,assembly=hg19>
    ##contig=<ID=chr22,length=51304566,assembly=hg19>
    ##contig=<ID=chr3,length=198022430,assembly=hg19>
    ##contig=<ID=chr4,length=191154276,assembly=hg19>
    ##contig=<ID=chr5,length=180915260,assembly=hg19>
    ##contig=<ID=chr6,length=171115067,assembly=hg19>
    ##contig=<ID=chr7,length=159138663,assembly=hg19>
    ##contig=<ID=chr8,length=146364022,assembly=hg19>
    ##contig=<ID=chr9,length=141213431,assembly=hg19>
    ##contig=<ID=chrM,length=16569,assembly=hg19>
    ##contig=<ID=chrX,length=155270560,assembly=hg19>
    ##contig=<ID=chrY,length=59373566,assembly=hg19>
    ##fileDate=20131025
    ##phasing=none
    ##reference=/results/referenceLibrary/tmap-f3/hg19/hg19.fasta
    ##reference=file:///results/referenceLibrary/tmap-f3/hg19/hg19.fasta
    ##source=Torrent Unified Variant Caller (Extension of freeBayes)
    #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ATE26
    #chr4	1369885	.	GC	G,AC	271.07	PASS	AO=19,17;DP=41;FAO=19,18;FDP=42;FR=.;FRO=5;FSAF=19,12;FSAR=0,6;FSRF=0;FSRR=5;FWDB=-0.153683,-0.00156175;FXX=0;HRUN=2,1;LEN=1,1;MLLD=24.6592,115.852;QD=25.8162;RBI=0.153685,0.0201718;REFB=-0.137714,-0.0539526;REVB=-0.000910218,0.0201112;RO=5;SAF=19,11;SAR=0,6;SRF=0;SRR=5;SSEN=0.324786,0;SSEP=0.269231,0;SSSB=0.865847,0.266476;STB=0.999164,0.646893;TYPE=del,snp;VARB=0.0853331,0.0412547;OID=.,.;OPOS=1369886,1369885;OREF=C,G;OALT=-,A;OMAPALT=G,AC	GT:GQ:DP:FDP:RO:FRO:AO:FAO:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR	1/2:82:41:42:5:5:19,17:19,18:0,6:19,11:0:5:0,6:19,12:0:5
    
    #2011 VCF files generated from the IGBMC files contains are version 4.0 compliant (http://www.1000genomes.org/node/101)
    #
    #GT:GQ:DP:HQ:NR
    
    #2013/07/27 VCF files generated by the IGBMC sequencing platform are version 4.1 compliant (http://www.1000genomes.org/node/101)
    #
    # WARNING indels are now called including the reference base in the mutation
    #
    #INFO=<ID=ABHet,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))">
    #INFO=<ID=ABHom,Number=1,Type=Float,Description="Allele Balance for homs (A/(A+O))">
    #INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
    #INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)">
    #INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
    #INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)">
    #INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
    #INFO=<ID=BaseCounts,Number=4,Type=Integer,Description="Counts of each base">
    #INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
    #INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio">
    #INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint">
    #INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
    #INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
    #INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
    #INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
    #INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
    #INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
    #INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">
    #INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
    #INFO=<ID=FlanquingSeq,Number=1,Type=String,Description="Flanquing 40 nucleotide sequences around variant.">
    #INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">
    #INFO=<ID=GC,Number=1,Type=Integer,Description="GC content around the variant (see docs for window size details)">
    #INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
    #INFO=<ID=HW,Number=1,Type=Float,Description="Phred-scaled p-value for Hardy-Weinberg violation">
    #INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">
    #INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
    #INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
    #INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
    #INFO=<ID=LowMQ,Number=3,Type=Float,Description="3-tuple: <fraction of reads with MQ=0>,<fraction of reads with MQ<=10>,<total number of reads>">
    #INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
    #INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
    #INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
    #INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
    #INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
    #INFO=<ID=OND,Number=1,Type=Float,Description="Overall non-diploid ratio (alleles/(alleles+non-alleles))">
    #INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">
    #INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">
    #INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">
    #INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
    #INFO=<ID=PercentNBaseSolid,Number=1,Type=Float,Description="Percentage of N bases in the pileup (counting only SOLiD reads)">
    #INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">
    #INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
    #INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
    #INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
    #INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
    #INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
    #INFO=<ID=Samples,Number=.,Type=String,Description="List of polymorphic samples">
    #INFO=<ID=TDT,Number=A,Type=Float,Description="Test statistic from Wittkowski transmission disequilibrium test.">
    #INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio">
    #INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias">
    #INFO=<ID=VariantType,Number=1,Type=String,Description="Variant type description">
    #INFO=<ID=multiple.Samples,Number=.,Type=String,Description="List of polymorphic samples">
    #INFO=<ID=multiple.variantCaller_M,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
    #INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
    #INFO=<ID=single.Samples,Number=.,Type=String,Description="List of polymorphic samples">
    #INFO=<ID=single.variantCaller_S,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
    #INFO=<ID=variantCaller_M,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
    #INFO=<ID=variantCaller_S,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">

    #GT:AB:AD:DP:GQ:MQ0:PL 
    #
    #FORMAT=<ID=AB,Number=1,Type=Float,Description="Allele balance for each het genotype">
    #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
    #FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
    #FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">
    #FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
    #FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
    #FORMAT=<ID=MQ0,Number=1,Type=Integer,Description="Number of Mapping Quality Zero Reads per sample">
    #FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
    #FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
    #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  ASJ-30_6
    #chr1    47726087        rs13376679      T       C       999     PASS    ABHet=0.599;ABHom=1.00;AC=1;AC1=2;AF=0.500;AF1=0.25;AN=2;BaseCounts=0,192,0,287;BaseQRankSum=17.878;ClippingRankSum=-0.662;DB;DP=479;DP4=400,410,129,154;Dels=0.00;FQ=999;FS=10.340;FlanquingSeq=TGTCCACCAGTCTTGAATGA[T/C]GTGTTTTTTTCTTTGTATGG;GC=37.62;HRun=0;HaplotypeScore=3.9769;LowMQ=0.0000,0.0000,479;MLEAC=2;MLEAF=0.250;MQ=59.25;MQ0=0;MQRankSum=-0.148;PV4=0.3,1,1,1;PercentNBaseSolid=0.0000;QD=2.09;ReadPosRankSum=4.154;Samples=APN-52_3,APN-66_13;VDB=0.0446;VariantType=SNP;multiple.Samples=APN-52_3,APN-66_13;multiple.variantCaller_M=Mpileup,Haplotype,Unified;set=Intersection;single.Samples=APN-52_3;single.variantCaller_S=Mpileup,Pileup,Unified,Haplotype;variantCaller_M=Mpileup,Haplotype,Unified    GT:AB:AD:DP:GQ:MQ0:PL   0/1:0.600:287,192:463:99:0:212,0,144

    set g_allPatients {}
    set patientsDir   $g_VaRank(vcfDir)
    
    set nbPatients_Total   0
    set nbVariations_Total 0
    set nbFiles 0

    set L_NewHeaders_VCF {}
    set g_vcfINFOS(L_IDs) {}
		    
    foreach vcfFile [glob -nocomplain $patientsDir/*.vcf] {

	incr nbFiles

	set DP_once 1
	set NR_once 1

	set l_patients {}

	puts "...parsing the VCF file ($vcfFile) ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
	
	set FirstTime 0

	set NbDPSaved    0
	set NbDPNotFound 0
	set NbDPEmpty    0
	set NbADSaved    0 
	set NbADNotFound 0
	set NbADEmpty    0

	foreach L [LinesFromFile $vcfFile] {
	    if {[regexp "^##" $L]} {continue}
	    if {[regexp "^#CHROM" $L]} {
		if {$FirstTime} {puts "WARNING: [file tail $vcfFile] seems to contain multiple headers";continue}

		set L [split $L "\t"]
		set i_chr    [lsearch -exact $L "#CHROM" ]; if {$i_chr    == -1} {puts "Bad header line syntax. #CHROM column not found - Exit"; exit}
		set i_pos    [lsearch -exact $L "POS"];     if {$i_pos    == -1} {puts "Bad header line syntax. POS column not found - Exit"; exit}
		set i_ref    [lsearch -exact $L "REF"];     if {$i_ref    == -1} {puts "Bad header line syntax. REF column not found - Exit"; exit}
		set i_alt    [lsearch -exact $L "ALT"];     if {$i_alt    == -1} {puts "Bad header line syntax. ALT column not found - Exit"; exit}
		set i_id     [lsearch -exact $L "ID"];      if {$i_id     == -1} {puts "Bad header line syntax. ID column not found - Exit"; exit}
		set i_qual   [lsearch -exact $L "QUAL"];    if {$i_qual   == -1} {puts "Bad header line syntax. QUAL column not found - Exit"; exit}
		set i_valid  [lsearch -exact $L "VALID"]
		set i_filter [lsearch -exact $L "FILTER"];  if {$i_filter == -1} {puts "Bad header line syntax. FILTER column not found - Exit"; exit}
		set i_info   [lsearch -exact $L "INFO"];    if {$i_info   == -1} {puts "Bad header line syntax. INFO column not found - Exit"; exit}
		set i_format [lsearch -exact $L "FORMAT"];  if {$i_format == -1} {puts "FORMAT column not found in the header line - Exit"; exit}

		#We need to have a global list for multiple files and multiple patients per file

		set nbPatients_File   0
		set nbVariations_File 0

		for {set i [expr {$i_format+1}]} {$i < [llength $L]} {incr i} {
		    set patient [lindex $L $i]
		    
		    incr nbPatients_File 
		    #puts $patient
		    
		    if {[lsearch -exact $g_allPatients $patient] != -1} {
			puts "\tWARNING: $patient seems to be present in different VCF files"
			lappend l_patients    $patient
		    } else {
			lappend g_allPatients $patient
			lappend l_patients    $patient
		    }

		    set i_$patient $i
		}
		set FirstTime 1

		continue
	    }

	    set L [split $L "\t"]

	    ## If there is only normal hom in the line (GT = 0|0 or 0/0), so there isn't mutation to analyse by VaRank
	    if {![regexp "1/0|0/1|2/0|0/2|1/1|2/2|2/1|1/2" $L] && ![regexp "1\\\|0|0\\\|1|2\\\|0|0\\\|2|1\\\|1|2\\\|2|2\\\|1|1\\\|2" $L]} {continue}

	    regsub -all " " [lindex $L $i_chr] "" chrom
	    regsub -all "^chr" $chrom "" chrom
	    
	    regsub -all " " [lindex $L $i_pos] "" pos
	    regsub -all " " [lindex $L $i_ref] "" ref
	    regsub -all " " [lindex $L $i_alt] "" alt
	    regsub -all " " [lindex $L $i_id]  "" rs

	    if {[set g_VaRank(rsFromVCF)]=="yes"} {
		if {[isNotAnRS $rs]} {
		    set rs "NA"; set valid "NA"
		} else {
		    if {$i_valid == -1} {
			set valid "NA"
		    } else {
			regsub -all " " [lindex $L $i_valid] "" valid
		    }
		}
	    } else {
		set rs "NA"; set valid "NA"
	    }

	    regsub -all " " [lindex $L $i_filter] "" filter
	    regsub -all " " [lindex $L $i_info]   "" info
	    regsub -all {\"} $info "" info

	    regsub -all " " [lindex $L $i_format] "" format
	    regsub -all " " [lindex $L $i_qual]   "" qual

	    # ALT colum
	    ###########
	    # ex: ALT = "A"
	    # ex: ALT = "A,AAC,AACACAC,AACACACACACACAC"

	    #Analysing the FORMAT COLUMN
	    ############################
	    set l_format [split $format ":"]

	    # GT: Genotype 
	    ##############
	    # 0 is for reference allele.
	    # 1 is for the first mutated allele of the ALT column
	    # 2 is for the second mutated allele of the ALT column
	    # 3 is for the third mutated allele of the ALT column
	    # ...etc
	    # GT value: "0|0 = hmz reference", "1|0 or 0|1 or 2|0 or 0|2 = htz", "1|1 or 2|2 = mutated hmz", "2|1 or 1|2 = double htz".
	    # In case with GT = 1|2 or 2|1: represents for VaRank 2 different ID (2 htz). Ok, will be see if deleterious in the compound htz.
	    #
	    set  j_gt [lsearch -exact $l_format "GT"]
	    if {$j_gt == -1} {puts "\tWARNING: GT (genotype) absent at least once from the FORMAT column - Exit"; exit}

	    # DP: Read Depth
	    ################
	    # The field describes the total depth of reads that passed the caller's internal quality control metrics 
	    #
	    set  j_dp [lsearch -exact $l_format "DP"]
	    if {$j_dp == -1} {
		if {$DP_once} {
		    puts "\tWARNING: DP (total read depth) absent at least once from the FORMAT column - continue"
		    if {[info exists g_VaRank(DEBUG)]} {puts "FORMAT:$format";puts $L}
		    set DP_once 0
		}
	    }

	    # NR: Read Number
	    #################
	    # Alternatively, AD can be used this one for getting the number of read for the variant
	    # Alternatively, AO can be used this one for getting the number of read for the variant
	    #
	    set  j_nr [lsearch -exact $l_format "NR"]
	    if {$j_nr == -1} {
		set  j_ad [lsearch -exact $l_format "AD"]
		if {$j_ad == -1} {
		    #USE AO for allele read depth
		    set  j_ao [lsearch -exact $l_format "AO"]
		    if {$j_ao == -1} {
			if {$NR_once} {
			    puts "\tWARNING: AO, AD and NR (read depth) absent at least once from the FORMAT column - continue";set NR_once 0
			    if {[info exists g_VaRank(DEBUG)]} {puts "FORMAT:$format"}
			}
		    } else {
			if {$NR_once} {
			    if {[info exists g_VaRank(DEBUG)]} {puts "\tWARNING: AD and NR (read depth) absent at least once from the FORMAT column - continue"}
			    set NR_once 0
			    #if {[info exists g_VaRank(DEBUG)]} {puts "FORMAT:$format"}
			}
		    }
		} else {
		    if {$NR_once} {
			if {[info exists g_VaRank(DEBUG)]} {puts "\tWARNING: NR (read depth) absent at least once from the FORMAT column - continue"}
			set NR_once 0
			if {[info exists g_VaRank(DEBUG)]} {puts "FORMAT:$format"}
		    }
		}
	    }

	    if {[info exists g_VaRank(DEBUG)]} {puts $L}

	    #Variant/allele position in case we check and have multiple allele to get the correct data
	    #The counting includes the fact that position at index 0 is the reference position
	    set k 0
	    foreach altn [split $alt ","] {
		if {$altn == ""} {continue}

		incr k
		incr nbVariations_File

		#We need to have temporary copies of pos and ref
		set pos_tmp $pos
		set ref_tmp $ref

		#VCF describing variation
		#
		#SNP VCF record
		#20     3 .         C      T    .   PASS  DP=100
		#
		#Insertion VCF record
		#20     3 .         C      CTAG    .   PASS  DP=100
		#This is a insertion since the reference base C is being replaced by C [the reference base] plus three insertion bases TAG. Again there are only two alleles so I have the two following segregating haplotypes:
		
		#Deletion VCF record
		#20     2 .         TCG      T    .   PASS  DP=100
		#This is a deletion of two reference bases since the reference allele TCG is being replaced by just the T [the reference base]. Again there are only two alleles so I have the two following segregating haplotypes:
				
		#Mixed VCF record for a microsatellite
		#20     2 .         TCGCG      TCG,TCGCGCG    .   PASS  DP=100
		#This is a mixed type record containing a 2 base insertion and a 2 base deletion. There are three segregating alleles so I have the three following haplotypes:
		#Ref: a t c g c g - - a // C is the reference base
		#Ref: a t c g - - - - a // following the C base is a deletion  of 2 bases
		#Ref: a t c g c g c g a // following the C base is a insertion of 2 bases
		
		#test sur la taille si altn > ref = insertion
		#test sur la taille si altn < ref = deletion

		if {![regexp "\\-" $ref_tmp] && ![regexp "\\-" $altn]} {
		    set ref_length  [string length $ref_tmp]
		    set altn_length [string length $altn]
		    
		    if {$ref_length>$altn_length} {
			#Here we have a deletion
			if {[string range $ref_tmp 0 [expr {$altn_length-1}]] eq $altn} {
			    set ref_tmp [string range $ref_tmp $altn_length end]
			    set altn    "-"
			    incr pos_tmp
			} else {
			    if {[info exists g_VaRank(DEBUG)]} {puts "WARNING: could not solve this deletion chr$chrom at position $pos $ref>$altn, continue as it is."}
			} 				    
			#if {[catch {regsub "^[set altn]" $ref_tmp "" ref_tmp} Message]} {}
		    } elseif {$ref_length<$altn_length} {
			#Here we have a insertion
			if {[string range $altn 0 [expr {$ref_length-1}]] eq $ref_tmp} {
			    set altn    [string range $altn $ref_length end]
			    set ref_tmp "-"
			} else {
			    if {[info exists g_VaRank(DEBUG)]} {puts "WARNING: could not solve this insertion chr$chrom at position $pos $ref>$altn, continue as it is."}
			} 				    
			
			#if {[catch {regsub "^[set ref_tmp]" $altn "" altn} Message]} {puts $Message;puts "Skipping $chrom $pos $ref, too big";continue}
		    }
		}

		set ID   [AssignID $chrom $pos_tmp $ref_tmp $altn]
		set sID  [split $ID "_"]

		set pos_tmp  [lindex $sID 1]
		set ref_tmp  [lindex $sID 2]
		set altn     [lindex $sID 3]

		if {![info exists g_vcfINFOS($ID)]} {
		    set g_vcfINFOS($ID) "$chrom $pos_tmp $ref_tmp $altn $rs $valid"
		    lappend g_vcfINFOS(L_IDs) $ID
		}

		#Analysing the INFO COLUMN
		#
		#We need to extract the INFO column and additional information from there.
		#Storing is done here but extraction is done while ranking to ensure the big picture
		#
		#To integrate the info column from the VCF we need to generate a list of cumulated headers from all patients and all input files
		#
		set l_duoInfos_VCF ""
		set   DPINFO    ""
		set i_DPINFO    ""

		if {$info!="" && $info!="." && $info!="NA"} {
		    set l_duoInfos_VCF [split $info ";"]

		    #No DP information from the FORMAT column so we try to rescue somehow
		    if {$j_dp == -1} {
			set  i_DPINFO [lsearch -regexp $l_duoInfos_VCF "^DP="]
			if {$i_DPINFO!="-1"} {
			    set DPINFO [split [lindex $l_duoInfos_VCF $i_DPINFO] "="]
			    set DPINFO [lindex $DPINFO 1]
			}
		    }
		}

		#save the different headers from INFO column (L_NewHeaders_VCF)
		if {[set g_VaRank(vcfInfo)]=="yes"} {
		    if {$info!="" && $info!="." && $info!="NA"} {
			#set l_duoInfos_VCF [split $info ";"]
			
			foreach duoInfos_VCF $l_duoInfos_VCF {
			    #Info is usually Header=Data
			    #Some field could be only data and no header, in that case we will get data and set data as header too
			    set duo_Header_Infos_VCF [split $duoInfos_VCF "="]
			    set Header [lindex $duo_Header_Infos_VCF 0]
			    
			    if {[llength $duo_Header_Infos_VCF]>2} {puts ">>>>>>>>>$ID $duo_Header_Infos ----- $Header"}
		    
			    if {$L_NewHeaders_VCF != {} && [lsearch -exact $L_NewHeaders_VCF $Header]!="-1"} {continue}	    
			    #Here we could filter out some Headers
			    #
			    lappend L_NewHeaders_VCF $Header
			}
		    }
		}

		foreach patient $l_patients {

		    if {![info exists i_$patient]} {continue}; # In case of several VCF files in input 

		    regsub -all " " [lindex $L [set i_$patient]] "" value
 
		    #Empty data are represented by ./. and filtered out
		    if {$value == "./."} {continue}

		    #List of values
		    set lvalue [split $value ":"]

		    #Genotype for the patient
		    set gt [lindex $lvalue $j_gt]

		    #Wild type variation (actually not variation) are filtered out
		    ## for some vcf: GT = "./." for some sample (not covered at this position)
		    if {[regexp "0/0" $gt] || [regexp "0\\\|0" $gt] || [regexp "\\./\\." $gt] || [regexp "\\.\\\|\\." $gt]} {continue}

		    #We need to extract the INFO column and additional information from there and this for each patient individually
		    #Storing is done here but extraction is done while ranking to ensure the big picture
		    #Analysing the INFO COLUMN
		    #
		    if {[set g_VaRank(vcfInfo)]=="yes"} {
			if {![info exists g_vcfINFOS_Supp($ID,$patient)]} {set g_vcfINFOS_Supp($ID,$patient) [split $info ";"]}
		    }
		    
		    if {$j_dp != -1} {
			#puts " [lindex $lvalue $j_dp]<dp"			
			regsub "\\.\[0-9\]+$" [lindex $lvalue $j_dp] "" dp
			#puts "$dp<dp"
			regsub ",\[0-9\]+$" $dp "" dp
			#puts "$dp<dp"
			if {$dp=="0"} {incr NbDPEmpty} else {incr NbDPSaved}
		    } elseif {$DPINFO != "" && [llength $l_patients]<=1} {
			set dp $DPINFO
			if {$dp=="0"} {incr NbDPEmpty} else {incr NbDPSaved}
		    } else {
			set dp "NA"
			incr NbDPNotFound
		    }

		    if {$j_nr != -1} {
			#puts "[lindex $lvalue $j_nr]<nr"
			regsub "\\.\[0-9\]+$" [lindex $lvalue $j_nr] "" nr
			#puts "$nr<nr"
			regsub ",\[0-9\]+$" $nr "" nr
			#puts "$nr<nr"
			if {$nr=="0"} {incr NbADEmpty} else {incr NbADSaved}
		    } elseif {$j_ad != -1} {
			#In case of multiple variants this should be retrieved by the indexed k
			set nr [lindex $lvalue $j_ad]
			set nr [lindex [split $nr ","] $k]
			if {$nr=="0"} {incr NbADEmpty} else {incr NbADSaved}
		    } elseif {$j_ao != -1} {
			#In case of multiple variants this should be retrieved by the indexed k
			#Apparently only the mutant allele are reproted or the k index include the reference in position 0
			set nr [lindex $lvalue $j_ao]
			set nr [lindex [split $nr ","] [expr {$k-1}]]
			
			#set nr [lindex [split $nr ","] $k]
			if {$nr=="0"} {incr NbADEmpty} else {incr NbADSaved}
		    } else {
			set nr "NA"
			incr NbADNotFound

		    }

		    #Getting the genotype from the variants.
		    #
		    #Multiple ways to get the information, from the GT field, from the info column and various filed in there or recalculate from depth and read depth
		    #
		    set StatutHomHet "NA"

		    #puts "$gt - $dp - $nr"
		    set n  [regsub -all "$k" $gt "" tutu]
		    #puts "$gt - $n"
		    if {[set g_VaRank(Homstatus)]=="yes"} {
			#Recompute the ratio for homozygosity/heterozygosity detection
			#
			#Ratio for homozygosity
			set Ratio    0
			set RatioHom [set g_VaRank(Homcutoff)]

			if {($nr !="." && $nr != "NA" && $nr != "0") && ($dp !="." && $dp != "NA" && $dp != "0")} {
			    if {[info exists g_VaRank(DEBUG)]} {puts "NR:$nr, DP:$dp"}

			    set Ratio [format "%.0f" [expr {$nr*100.0/$dp}]]

			    if {[info exists g_VaRank(DEBUG)]} {puts "$nr $dp gives a ratio $Ratio"}

			    if {$Ratio>=$RatioHom} {set StatutHomHet "hom"} else {set StatutHomHet "het"}
			} else {
			    #If no ratio can be computed (eg, BGI files with no coverage), we keep the old way
			    if {$n == 1} {set StatutHomHet "het"} elseif {$n == 2} {set StatutHomHet "hom"}
			}
		    } else {
			#Get the homozygosity/heterozygosity detection from the files
			if {$n == 1} {
			    set StatutHomHet "het"
			} elseif {$n == 2} {
			    set StatutHomHet "hom"
			} else {
			    #In case the ratio is not given properly (bug observed in some tools), recompute the ratio for homozygosity/heterozygosity detection
			    #Many programs have trouble in given GT calls for multiple allele in vcf
			    #
			    set Ratio    0
			    set RatioHom [set g_VaRank(Homcutoff)]
			    
			    if {($nr !="." && $nr != "NA" && $nr != "0") && ($dp !="." && $dp != "NA" && $dp != "0")} {

				if {[info exists g_VaRank(DEBUG)]} {puts "NR:$nr, DP:$dp"}
				
				set Ratio [format "%.0f" [expr {$nr*100.0/$dp}]]
				
				if {[info exists g_VaRank(DEBUG)]} {puts "$nr $dp gives a ratio $Ratio"}
				
				if {$Ratio>=$RatioHom} {set StatutHomHet "hom?"} else {set StatutHomHet "het?"}
			    }
			}
		    }
		    #Now that we have defined that NA is by default for zygosity status, one has to selectively not include variants in other patients when no coverage is available
		    if {$StatutHomHet == "NA" && ($nr =="." || $nr == "NA" || $nr == "0") && ($dp =="." || $dp == "NA" || $dp == "0")} {continue}
		    #lappend g_vcfINFOS($ID) "$patient:$StatutHomHet:$dp:$nr"
		    lappend g_vcfINFOS($ID) "$patient:$StatutHomHet:$dp:$nr:$qual"
		    set     g_vcfINFOS($ID,$patient) 1
		}
	    }
	}
	incr   nbPatients_Total $nbPatients_File
	incr nbVariations_Total $nbVariations_File

	puts "\tFile loaded: $nbVariations_File variation(s) and $nbPatients_File sample(s) ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])."
	puts "\tFile loaded: Total Read Depth:  $NbDPSaved variant(s) are different to 0, $NbDPEmpty variant(s) are equal to 0, $NbDPNotFound are empty"
	puts "\tFile loaded: Allele Read Depth: $NbADSaved variant(s) are different to 0, $NbADEmpty variant(s) are equal to 0, $NbADNotFound are empty"
    }

    if {[set g_VaRank(vcfInfo)]=="yes"} {
	set g_vcfINFOS_Supp(Header) [lsort -dictionary $L_NewHeaders_VCF]
    }

    puts "...VCF file(s) loaded: $nbFiles file(s) for $nbVariations_Total variation(s) in $nbPatients_Total sample(s) ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"
    
    #foreach L [array get g_vcfINFOS] {puts $L}

    ## Check if all the patient given by family in the config file are presents in the VCF file.
    if {[info exists g_lPatientsOf]} {
	foreach fam [array names g_lPatientsOf] {

	    set lBadPatients  {}
	    set lPatients_Tmp {}

	    foreach patient $g_lPatientsOf($fam) {
		if {[lsearch -exact $g_allPatients $patient] == -1} {lappend lBadPatients $patient} else {lappend lPatients_Tmp $patient}
	    }
	    
	    if {$lBadPatients != {}} {
		puts "\t...patient names \"[join $lBadPatients " "]\" (given in config file for family study) absent from the VCF file. Updating $fam patient list ($lPatients_Tmp)."
		#puts "\t...Families not taken into account in this study"
			
		set g_lPatientsOf($fam) $lPatients_Tmp
	    }
	}
    }

    ## Patients without an attributed family in the config file have here a new family.
    ## If no family have been defined in the config file, we attribute a family for each patient of the VCF files.
    set lPasTrouve {}
    if {[info exists g_lPatientsOf]} {
	foreach patient $g_allPatients {
	    set pastrouve 1

	    foreach fam [array names g_lPatientsOf] {
		if {[lsearch -exact $g_lPatientsOf($fam) $patient] != -1} {set pastrouve 0}
	    }
	    if {$pastrouve} {lappend lPasTrouve $patient}
	}
    } else {
	foreach patient $g_allPatients {
	    lappend lPasTrouve $patient
	}
    }

    set lPasTrouve [lsort -dictionary $lPasTrouve]

    set i 1
    foreach patient $lPasTrouve {
	while {[info exists g_lPatientsOf(fam$i)]} {incr i}
	set g_lPatientsOf(fam$i) $patient
    }

    ## We re-sort the list of patients in g_allPatients to match those in g_lPatientsOf
    ## -> Used to write header files of ranking (barcodes lines)
    unset g_allPatients
    set   g_allPatients {}
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    lappend g_allPatients $patient
	}
    }

    return
}

