##########################################################################
# VaRank 1.0                                                             #
#                                                                        #
# VaRank: a simple and powerful tool for ranking genetic variants        #
#                                                                        #
# Copyright (C) 2014 Veronique Geoffroy (veronique.geoffroy@inserm.fr)   # 
#                    Jean Muller (jeanmuller@unistra.fr)                 # 
#                                                                        #
# Please cite the following article:                                     #
#    XXX                                                                 #
#                                                                        #
# This is part of VaRank source code.                                    #
#                                                                        #
# This program is free software; you can redistribute it and/or          #
# modify it under the terms of the GNU General Public License            # 
# as published by the Free Software Foundation; either version 3         # 
# of the License, or (at your option) any later version.                 #
#                                                                        #
# This program is distributed in the hope that it will be useful,        # 
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with this program; If not, see <http://www.gnu.org/licenses/>.   #
##########################################################################

proc writeAllStatistics {} {

    global g_VaRank
    global g_lPatientsOf
    global g_Statistics
        
    puts "...writing patients and global statistics ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"

    set L_codingEffect [list synonymous missense nonsense In-frame Frameshift startloss stoploss unknown]
    set L_varLocation  [list intron upstream "5'UTR" "3'UTR" downstream "splice site"]
    
    foreach effect [concat $L_codingEffect $L_varLocation Total] {set L_$effect {}}
    
    foreach fam [array names g_lPatientsOf] {
	foreach patient $g_lPatientsOf($fam) {
	    
	    if {![info exists g_Statistics($patient)]} {puts "\t...$patient statistics could not be written, rerun while removing all the patient outputdata.";continue}

	    set     Line {}
	    lappend Line [join [list What Total Homozygous Heterozygous Null] "\t"]
	    foreach effect [concat $L_codingEffect $L_varLocation] {
		lappend L_$effect  [set g_Statistics($patient,$effect)]
		
		lappend Line [join [list $effect [set g_Statistics($patient,$effect)] [set g_Statistics($patient,$effect,Hom)] [set g_Statistics($patient,$effect,Het)] [set g_Statistics($patient,$effect,Null)]] "\t"]
	    }
	    lappend Line "Total:  [set g_Statistics($patient)] variations."
	    
	    lappend L_Total  [set g_Statistics($patient)]

	    set outputfile "$g_VaRank(vcfDir)/[set fam]_[set patient]_statistics.tsv"
	    if {[file exists $outputfile]} {file delete -force $outputfile}
	    WriteTextInFile [join $Line "\n"] $outputfile
	}
    }	
    
    set     Line {}
    lappend Line "Total statistics (non redundant):"
    lappend Line [join [list What "Total" Mean SD] "\t"]
    foreach effect [concat $L_codingEffect $L_varLocation] {
	
	if {![info exists g_Statistics(All,$effect)]} {continue}

	set Mean NA
	set SD   NA

	if {[llength [set L_$effect]>1] && [set L_$effect]!=0 && [set L_$effect]!={}} {
	    set MVSD [BasicStatistics [set L_$effect]]
	    set Mean [format "%.0f" [lindex $MVSD 0]]
	    set SD   [format "%.0f" [lindex $MVSD 2]]
	}
	lappend Line [join [list $effect [set g_Statistics(All,$effect)] $Mean $SD] "\t"]
    }

    set Mean NA
    set SD   NA

    if {[llength [set L_Total]>1] && [set L_Total]!=0 && [set L_Total]!={}} {
	
	set MVSD [BasicStatistics [set L_Total]]
	set Mean [format "%.0f" [lindex $MVSD 0]]
	set SD   [format "%.0f" [lindex $MVSD 2]]
    }
    if {[info exists g_Statistics(All,byeffect)]} {
	lappend Line "[join [list Total [set g_Statistics(All,byeffect)] $Mean $SD] "\t"]"
    }

    #To do a graph
    #lappend Line "NbAll variations: [set L_Total]"
    
    set outputfile "$g_VaRank(vcfDir)/SNV_global_statistics.tsv"

    if {[file exists $outputfile]} {file delete -force $outputfile}
    if {[llength $Line]>2} {
	WriteTextInFile [join $Line "\n"] $outputfile
    } else {
	puts "\t...Global statistics could not be written, rerun with removing all outputdata."
    }
    return 
}

proc BasicStatistics {Liste} {

    #Allow the calculation of Basic Statistics
    #Mean, Variance, Standart deviation
    
    #SV  Sum of values
    #SC  Sum of Squares
    #MV  Mean of Values
    #MC  Mean of Squares
    #Var Variance

    set SC  "0.0"
    set SV  "0.0"
    set Var "0.0"
    set SD  "0.0"

    set NV [llength $Liste]
    foreach V $Liste {
	set SV [expr {$SV+$V}]
	set SC [expr {($SC+pow($V,2))*1.0}]
    }    
    
    #puts "SV [join $SV ","]"
    #puts "NV [join $NV ","]"

    set MV  [expr {$SV/$NV}]
    set MC  [expr {$SC/$NV}]
    set Var [expr {$MC-pow($MV,2)}]

    if {$Var>0.0} {
	set SD  [expr {sqrt($Var)}]
    } else {
	set SD 0.0
    }
    #Spy "Nb: $NV - SV: $SV - SC: $SC - MV: $MV - MC: $MC - Var: $Var - SD: $SD"

    return [list $MV $Var $SD]
}


