Index by: file name | procedure name | procedure call | annotation
gscope_blastomics.tcl (annotations | original source)

#rR gscope_blastomics.tcl

proc NicoMapping {{Qui ""} {Quoi ""}} {
    global NicoMapping
    set Fichier "~scalzitti/These/G3PO_Benchmark/Data/ID_ensembl_new_version.csv"
    if {[info exists NicoMapping($Qui,$Quoi)]} { return $NicoMapping($Qui,$Quoi) }
    if {[info exists NicoMapping("EstCharge")]} { return "" }
    set NicoMapping("EstCharge") 1
    foreach Ligne [LesLignesDuFichier $Fichier] {
	lassign [split $Ligne ";"] Id Acc IdT IdG
	if {$Id=="ID_UNIPROT"} { continue }
	foreach I [list Id Acc IdT IdG] {
	    lappend NicoMapping(ListOf,$I) [set $I]
	    foreach J [list Id Acc IdT IdG] {
		set NicoMapping([set $I],$J) [set $J]
	    }
	}
    }
    return [NicoMapping $Qui $Quoi]
}

proc CarExVerif {{Liste ""}} {
    if {$Liste==""} { set Liste [ListeDesPABs] }
    foreach Nom $Liste {
	set GN [ExtraitInfo $Nom "ValiGN:"]
	set NbNon 0
	set NbOui 0
	set Nb    0
	foreach Id [bbsc $Nom listof id] {
	    set Cds [CarEx $Id cds]
	    if {$Cds==""} {
		incr NbNon 
	    } else {
		incr NbOui
	    }
	    incr Nb
	}
	Espionne [format "%s %-12s %4d %s %5d %s %s %5d %3d%s de oui" $Nom $GN $NbNon non $NbOui oui sur $Nb [expr (100*$NbOui)/$Nb] "%"]
    }
    return finCarExVerif
}

proc CarEx {{Qui ""} {Quoi ""} {Quid ""}} {
    global CarEx
    set Qui  [string toupper $Qui]
    set Quoi [string tolower $Quoi]
    set Quid [string tolower $Quid]
    if {$Qui=="" && $Quoi=="" && $Quid==""} { set Qui "HELP" }
    if {[info exists CarEx($Qui,$Quoi,$Quid)]} { return $CarEx($Qui,$Quoi,$Quid) }
    if {[info exists CarEx("EstCharge")]} {
	if {$Quoi=="exon" || $Quoi=="intron"} {
	    if {[info exists CarEx($Qui,${Quoi}_$Quid,limits)]} {
		set Limits $CarEx($Qui,${Quoi}_$Quid,limits)
		if {[info exists CarEx($Qui,sg,filename)]} { 
		    set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)]
		    return [string range n$Seq {*}$Limits]
		}
	    }
	}
	if {$Quoi=="" || $Quoi=="cds"} {
	    if {[info exists CarEx($Qui,exon,limits)]} {
		set Limits $CarEx($Qui,exon,limits)
		if {[info exists CarEx($Qui,sg,filename)]} { 
		    set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)]
		    set Cds ""
		    foreach Limit $Limits {
			lassign $Limit D F
			append Cds [string range n$Seq $D $F]
		    }
		    return $Cds
		}
	    }
	}	    
	return ""
    }
    set CarEx("EstCharge") 1

    set LeHelp {}
    lappend LeHelp "CarEx Help"
    lappend LeHelp "CarEx listof id" 
    lappend LeHelp "CarEx T1JH94_STRMM ce filename     ;# (carte exonique)"
    lappend LeHelp "CarEx T1JH94_STRMM sg filename     ;# (séquence génomique)"
    lappend LeHelp "CarEx T1JH94_STRMM listof exon     ;# 1 2 3 4"
    lappend LeHelp "CarEx T1JH94_STRMM listof intron   ;# 1 2 3"
    lappend LeHelp "CarEx T1JH94_STRMM exon limits     ;# {1 320} {380 466} {534 665} {776 934}"
    lappend LeHelp "CarEx T1JH94_STRMM exon_3 start    ;# 534"
    lappend LeHelp "CarEx T1JH94_STRMM exon_3 end      ;# 665"
    lappend LeHelp "CarEx T1JH94_STRMM exon_3 limits   ;# 534 665"
    lappend LeHelp "CarEx T1JH94_STRMM exon 3          ;# GATAGTTTAATTCGAAAAAAAT... etc"
    lappend LeHelp "CarEx T1JH94_STRMM intron limits"
    lappend LeHelp "CarEx T1JH94_STRMM intron_2 start"
    lappend LeHelp "CarEx T1JH94_STRMM intron_2 end"
    lappend LeHelp "CarEx T1JH94_STRMM intron_2 limits"
    lappend LeHelp "CarEx T1JH94_STRMM intron 2"
    lappend LeHelp "CarEx T1JH94_STRMM cds             ;# il y a meme le CDS"
    lappend LeHelp "CarEx T1JH94_STRMM                 ;#  ... pour rien"
    set CarEx(HELP,,) [join $LeHelp "\n"]


    set SeqGenDir  "/home/scalzitti/These/G3PO_Benchmark/Data/Fasta/0Kb"
    set CarExoDir  "/home/scalzitti/These/G3PO_Benchmark/Data/Structures_map/References"
    set SeqGenHDir "/home/scalzitti/These/Part2_IA/Data/Structure_map/R98_0919"
    set CarExoHDir "/home/scalzitti/These/Part2_IA/Data/Sequences_genomiques/R98_0919/References/0Kb"
    foreach FicSeqGen [lsort [glob -nocomplain "$SeqGenDir/*.fasta"]] {
	set Queue [file tail $FicSeqGen]
	regsub "\.fasta$" $Queue "" Id
	lappend CarEx(LISTOF,sg,id) $Id
	set CarEx($Id,sg,filename) $FicSeqGen
    }
    foreach FicCarExo [lsort [glob -nocomplain "$CarExoDir/*.gar"]] {
	set Queue [file tail $FicCarExo]
	regsub "\.gar$" $Queue "" Id
	lappend CarEx(LISTOF,ce,id) $Id
	set CarEx($Id,ce,filename) $FicCarExo
	set Nom [bbsc $Id nom]
	set Alias [ExtraitInfo $Nom "ValiGN:"]
	lappend CarEx(LISTOF,nom,)    $Nom
	lappend CarEx($Id,listof,nom) $Nom
	set NOM [string toupper $Nom]
	lappend CarEx($NOM,listof,id) $Id
	lappend CarEx($Id,listof,nom) $Nom
	foreach Ligne [LesLignesDuFichier $FicCarExo] {
	    
	    if { ! [regexp -nocase {^([a-z]+)_([0-9]+)[ \t]+([0-9]+)[ \t]+([0-9]+)} $Ligne Match EI I D F]} { continue }
	    set Reverse [regexp {\-1} $Ligne]
	    set ei [string tolower $EI]
	    lappend CarEx($Id,listof,$ei) $I
	    set W [Maxi $D $F]
	    set D [Mini $D $F]
	    set F $W
	    set CarEx($Id,${ei}_$I,limits) "$D $F"
	    set CarEx($Id,${ei}_$I,start)  $D
	    set CarEx($Id,${ei}_$I,end)    $F
	    lappend CarEx($Id,$ei,i) $I
	    lappend CarEx($Id,$ei,limits) "$D $F"
	}
	if {$Reverse} {
	    foreach ei [list exon intron] {
		if { ! [info exists CarEx($Id,$ei,i)]} { continue }
		set CarEx($Id,$ei,limits) [lreverse $CarEx($Id,$ei,limits)]
		set N [llength $CarEx($Id,$ei,i)]
		foreach What [list limits start end] {
		    set LesNew {}
		    foreach I $CarEx($Id,$ei,i) {
			lappend LesNew $CarEx($Id,${ei}_[expr $N-$I+1],$What)
		    }
		    foreach I $CarEx($Id,$ei,i) {
			set $CarEx($Id,${ei}_$I,$What) [lindex $LesNew $I-1] 
		    }
		}
	    }
	}
    }
    foreach Id $CarEx(LISTOF,ce,id) {
	if {[info exists CarEx($Id,listof,nom)]} { set CarEx($Id,listof,nom) [lsort -unique $CarEx($Id,listof,nom)] }
    }
    set CarEx(LISTOF,nom,) [lsort -unique $CarEx(LISTOF,nom,)]
    set CarEx(LISTOF,ce,) $CarEx(LISTOF,ce,id)
    set CarEx(LISTOF,id,) $CarEx(LISTOF,ce,id)
    return [CarEx $Qui $Quoi $Quid]
}

proc CreateMacsimXmlNuc {} {
    NousAllonsAuBoulot [RepertoireDuGenome]
    file mkdir macsimXmlNuc
    file mkdir macsimXmlNuc3
    file mkdir macsimXmlNucProt3
    foreach Nom [ListeDesPABs] {
	Espionne $Nom
	foreach Type [list Nuc Nuc3 NucProt3] {
	    set Msf [GscopeFile $Nom msf$Type]
	    set Mac [GscopeFile $Nom macsimXml$Type]
	    exec clustalw $Msf -convert -output=macsims -outfile=$Mac -outorder=input
	}
    }
    OnRevientDuBoulot
    return Bravo
}

proc CreateNucAliFromProtAliBBSCPourTous {} {
    foreach Nom [ListeDesPABs] {
	CreateNucAliFromProtAliBBSC $Nom
    }
    return Bravo 
}

proc CreateNucAliFromProtAliBBSC Nom {
    file mkdir "[RepertoireDuGenome]/nucalitfa"
    file mkdir "[RepertoireDuGenome]/msfNuc"
    file mkdir "[RepertoireDuGenome]/msfNuc3"
    file mkdir "[RepertoireDuGenome]/msfNucProt3"
    set FichierMsfProt [GscopeFile $Nom msfProt]
    if {[FileAbsent $FichierMsfProt]} { return "" }

    set MsfProt [ContenuDuFichier $FichierMsfProt]
    if {[regexp "^NoSequencesToAlign" $MsfProt]} { return "" }

    set FichierMsfTfa [TmpFile].tfa
    exec clustalw $FichierMsfProt -convert -output=FASTA -outfile=$FichierMsfTfa
    set LeMsfTfaNuc {}
    foreach Access [LaSequenceDuTFAs $FichierMsfTfa "LaListeDesAccess"] {
	set SeqProtTfa [QueLaSequenceDuTexteTFA [bbsc $Access prottfa content]]
	set SeqNucTfa  [QueLaSequenceDuTexteTFA [bbsc $Access nuctfa content]]
	Espionne $Nom $Access [expr [string length $SeqProtTfa]*3] [string length $SeqNucTfa]
	if {$SeqNucTfa==""} { Espionne "$Access est vide" ; continue }
	set I -1
	foreach P [split $SeqProtTfa ""] {a b c} [split $SeqNucTfa ""] {
	    incr I
	    set Codon "$a$b$c"
	    set CodonEn($I) $Codon
	    #Espionne $Access $I $P [AAduCodon $Codon] $Codon
	}
	set TfaProt [LaSequenceDuTFAs $FichierMsfTfa $Access]
	set SeqProt [QueLaSequenceDuTexteTFA $TfaProt]
	set LaSeqNucAlignee {}
	set I -1
	foreach A [split $SeqProt ""] {
	    if {$A=="-" || $A=="."} { lappend LaSeqNucAlignee "---" ; continue }
	    incr I
	    #Espionne $I $A 
	    if { ! [info exists CodonEn($I)]} { set CodonEn($I) "NNN" }
	    set Codon [string toupper $CodonEn($I)]
	    lappend LaSeqNucAlignee $Codon
	}
	lappend LeMsfTfaNuc ">$Access homolog from $Nom nucaligment from protaligment"
	lappend LeMsfTfaNuc [join $LaSeqNucAlignee ""]
    }

    set FichierNucAliTfa [SauveLesLignes $LeMsfTfaNuc dans [GscopeFile $Nom "nucalitfa"]]
    set FichierMsfNuc [GscopeFile $Nom "msfNuc"]
    exec clustalw $FichierNucAliTfa -convert -output=gscope -outfile=$FichierMsfNuc
    MsfOnOneLine $Nom msfNuc msfNuc3     "" ShowCodons
    MsfOnOneLine $Nom msfNuc msfNucProt3 "" ShowCodons
    return $FichierMsfNuc
}

proc bbsc {{Qui ""} {Quoi ""} {Quid ""}} {
    global bbsc
    if {[string equal -nocase $Qui "Help"] || ($Qui=="" && $Quoi=="")} {
	set LeHelp {}
	lappend LeHelp "bbsc help"
	lappend LeHelp "bbsc listof nom"
	lappend LeHelp "bbsc listof acid"
	lappend LeHelp "bbsc listof ac"
	lappend LeHelp "bbsc listof id"
	lappend LeHelp "bbsc listof human acid"
	lappend LeHelp "bbsc listof human ac"
	lappend LeHelp "bbsc listof human id"
	lappend LeHelp "bbsc listof cds count"
	lappend LeHelp "bbsc listof human acid"
	lappend LeHelp "bbsc BBSC01 aliprot"
	lappend LeHelp "bbsc BBSC01 alinuc"
	lappend LeHelp "bbsc BBSC01 msf ... or msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf"
	lappend LeHelp "bbsc BBSC01 acid"
	lappend LeHelp "bbsc BBSC01 ac"
	lappend LeHelp "bbsc BBSC01 id"
	lappend LeHelp "bbsc BBSC01 cds count"
	lappend LeHelp "bbsc BBSC01 listof org"
	lappend LeHelp "bbsc BBSC01 listof copsaliprot"
	lappend LeHelp "bbsc BBSC01 listof acid"
	lappend LeHelp "bbsc BBSC01 listof ac"
	lappend LeHelp "bbsc BBSC01 listof id"
	lappend LeHelp "bbsc BBSC01 listof cds"
	lappend LeHelp "bbsc BBSC01 listof missingcds"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nom"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE acid"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE ac"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE id"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa filename"
	lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa content"
	lappend LeHelp "bbsc BBS10_MOUSE nom"
	lappend LeHelp "bbsc BBS10_MOUSE acid"
	lappend LeHelp "bbsc BBS10_MOUSE ac"
	lappend LeHelp "bbsc BBS10_MOUSE id"
	lappend LeHelp "bbsc BBS10_MOUSE nuctfa filename"
	lappend LeHelp "bbsc BBS10_MOUSE nuctfa content"
	lappend LeHelp "bbsc Q9DBI2 nom"
	lappend LeHelp "bbsc Q9DBI2 acid"
	lappend LeHelp "bbsc Q9DBI2 ac"
	lappend LeHelp "bbsc Q9DBI2 id"
	lappend LeHelp "bbsc Q9DBI2 nuctfa filename"
	lappend LeHelp "bbsc Q9DBI2 nuctfa content"
	lappend LeHelp "bbsc MOUSE listof acid"
	lappend LeHelp "bbsc listof content"
	return [join $LeHelp "\n"]
    }
    set LesContent [list aliprot alinuc nuctfa prottfa msf msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf]
    set bbsc(listof,content,) $LesContent
    if {[info exists bbsc($Qui,$Quoi,$Quid)]} { return $bbsc($Qui,$Quoi,$Quid) } 
    if {[info exists bbsc("EstCharge")]} {
	if {$Quid=="" && [lsearch $LesContent $Quoi]>=0 } { set Quid "content" }
	if {[info exists bbsc($Qui,acid,)] && $bbsc($Qui,acid,)!=$Qui} {
	    set AcId $bbsc($Qui,acid,)
	    if {[info exists bbsc($AcId,$Quoi,$Quid)]} {
		return [bbsc $AcId $Quoi $Quid]
	    }
	    if {$Quid=="content" && [info exists bbsc($AcId,nom,)] && [info exists bbsc($bbsc($AcId,nom,),$Quoi,filename)]} {
		return [ContenuDuFichier $bbsc($bbsc($AcId,nom,),$Quoi,filename)]
	    }
	}
	if {[info exists bbsc($Qui,nom,)]} {
	    if {[info exists bbsc($bbsc($Qui,nom,),$Quoi,$Quid)]} {
		return [bbsc $bbsc($Qui,nom,) $Quoi $Quid]
	    }
	    if {$Quid=="content" && [info exists bbsc($bbsc($Qui,nom,),$Quoi,filename)]} {
		return [bbsc $bbsc($Qui,nom,) $Quoi $Quid]
	    }
	}
	if {$Quid=="content" && [info exists bbsc($Qui,$Quoi,filename)]} {
	    return [ContenuDuFichier $bbsc($Qui,$Quoi,filename)]
	}
	return ""
    }
    set bbsc("EstCharge") 1
    set RG [RepertoireDuGenome]
    foreach Nom [ListeDesPABs] {
	lappend bbsc(listof,nom,) $Nom
	foreach Content $LesContent {
	    set bbsc($Nom,$Content,filename) "$RG/$Content/$Nom"
	}
	set FicAliprot "$RG/aliprot/$Nom"
	set FicAlinuc  "$RG/alinuc/$Nom"
	set bbsc($Nom,alinuc,filename) $FicAliprot
	set LesCopAli [LaSequenceDuTFAs $FicAliprot "LaListeDesAccess"]
	set bbsc($Nom,listof,copsaliprot) $LesCopAli
	set bbsc($Nom,acid,) [lindex $LesCopAli 0]
	set RepCopsNuctfa  "$RG/copsnuctfa/$Nom"
	set RepCopsProttfa "$RG/copsprottfa/$Nom"
	set bbsc($Nom,listof,cds) {}
	set bbsc($Nom,listof,missingcds) {}
	set AcIdHuman ""
	foreach AcId $LesCopAli {
	    lassign [split $AcId "_"] A I O
	    set Ac $A
	    set Id ${I}_$O
	    lappend bbsc($Nom,listof,acid) $AcId
	    lappend bbsc($Nom,listof,ac)   $Ac
	    lappend bbsc($Nom,listof,id)   $Id
	    if {$AcIdHuman==""} {
		set AcIdHuman $AcId
		set IdHuman   $Id
		set AcHuman   $Ac
		set bbsc($Nom,acid,) $AcIdHuman
		set bbsc($Nom,ac,)   $AcHuman
		set bbsc($Nom,id,)   $IdHuman
		lappend bbsc(listof,human,acid) $AcIdHuman 
		lappend bbsc(listof,human,ac)   $AcHuman 
		lappend bbsc(listof,human,id)   $IdHuman 
	    }
	    lappend bbsc($Nom,listof,org)  $O
	    lappend bbsc(listof,org,)      $O
	    lappend bbsc($O,listof,nom)    $Nom
	    lappend bbsc($O,listof,acid)   $AcId
	    lappend bbsc($AcId,nom,)       $Nom
	    lappend bbsc($AcId,acid,)      $AcId
	    lappend bbsc($AcId,ac,)        $Ac
	    lappend bbsc($AcId,id,)        $Id
	    lappend bbsc($Ac,acid,)        $AcId 
	    lappend bbsc($Id,acid,)        $AcId
	    lappend bbsc(listof,acid,)     $AcId
	    lappend bbsc(listof,ac,)       $Ac
	    lappend bbsc(listof,id,)       $Id

	    set FicCopProttfa "$RepCopsProttfa/$AcId"
	    set bbsc($AcId,prottfa,filename) $FicCopProttfa
	    
	    set FicCopNuctfa "$RepCopsNuctfa/$AcId"
	    if {[file exists $FicCopNuctfa]} {
		lappend bbsc($Nom,listof,cds) $AcId
		set bbsc($AcId,nuctfa,filename) $FicCopNuctfa
	    } else {
		lappend bbsc($Nom,listof,missingcds) $AcId
	    }
	}
	set P [llength $bbsc($Nom,listof,cds)]
	set A [llength $bbsc($Nom,listof,missingcds)]
	set T [expr $A+$P]
	set PC [expr (100*$A)/$T]
	set bbsc($Nom,cds,count) [format "%4d %s %3d %s %3d %s %3d%s %s" $T cops $P cds $A missingcds $PC % missing] 
	lappend bbsc(listof,cds,count) "$Nom $bbsc($Nom,cds,count)"
	set bbsc($Nom,aliprot,filename) "$RG/aliprot/$Nom"
	set bbsc($Nom,alinuc,filename)  "$RG/alinuc/$Nom"
    }
    return [bbsc $Qui $Quoi $Quid]
}
	   
proc CdsFromRefseq Refseq {
    JeMeSignale
    set GB [eFetchREST nucleotide $Refseq]
    DecortiqueGenBank OS OC SeqADN $GB
    if { ! [regexp {\n +CDS +([^\n]+)\n} $GB Match Limits]} {
	Espionne $GB
	FaireLire "pas de cds pour $Refseq !!!!!"
	return ""
    }
    set Cds ""
    regsub -all {[<>]} $Limits "" Limits
    if {[regexp {[^0-9\,\.]} $Limits]} { 
	Espionne $GB
	Espionne $Limits
	FaireLire "bad limits $Limits !!!!!"
	return ""
    }
    foreach Limit [split $Limits ","] {
	lassign [split $Limit "."] D Bidon F
        append Cds [string range $SeqADN $D-1 $F-1]
    }
    return $Cds
}

proc MrnaDesCopainsPourTous {{Liste ""}} {
    set LaSortie {}
    set CopsNucTfaDir "[RepertoireDuGenome]/copsnuctfa"
    set CopsProtTfaDir "[RepertoireDuGenome]/copsprottfa"
    file mkdir "[RepertoireDuGenome]/copsnuctfa"
    if {$Liste==""} { set Liste [ListeDesPABs] }
    foreach Nom $Liste {
	set MesNucTfa "$CopsNucTfaDir/$Nom"
	file mkdir $MesNucTfa
	set LesTfa {}
	foreach FicProtTfa [lsort [glob -nocomplain "$CopsProtTfaDir/$Nom/*"]] {
	    set Entete [EnteteDuFichierTFA $FicProtTfa]
	    regsub "^>" $Entete "" Entete
	    scan $Entete "%s" Copain
	    lassign [split $Copain "_"] A B O
	    set AC $A
	    set ID ${B}_$O
	    set FicNucTfa  "$MesNucTfa/$Copain"
	    if {[file exists $FicNucTfa]} { continue }
	    Espionne ya pas $FicNucTfa
	    #set LesTfa [MrnasFrom $AC]
	    set Refseqs [IDMapping ACC+ID REFSEQ_NT $AC]
	    
	    foreach LesRefseq $Refseqs {
		foreach Refseq $LesRefseq {
		    if {$Refseq!=""} {
			set Cds [CdsFromRefseq $Refseq]
			set Entete "$Refseq CDS from REFSEQ_NT $Refseq from ACC $AC from $Nom"
			set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
			lappend LesTfa $Tfa
		    }
		}
	    }

	    #rR On prend aussi les info de Nicolas !!!
	    set IdT [NicoMapping $AC IdT]
	    if {$IdT!=""} {
		set Cds [CdsFromDec2016 $IdT]
		set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom"
		set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
		lappend LesTfa $Tfa		
	    } else {
                set IdT [NicoMapping $ID IdT]
                if {$IdT!=""} {
                    set Cds [CdsFromDec2016 $IdT]
                    set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom"
                    set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
                    lappend LesTfa $Tfa
                }
            }
            Espionne $AC $ID =$IdT=

	    if {$LesTfa!={}} {
		set Pro [QueLaSequenceDuFichierTFA $FicProtTfa]
		set Max [string length $Pro]
		set LesMismatch {}
		foreach Tfa $LesTfa {
		    set Ent [EnteteDuTexteTFA $Tfa]
		    set Nuc [QueLaSequenceDuTexteTFA $Tfa]
		    set OriLength [expr [string length $Nuc]/3]
		    set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]]
		    set NucTraduit [SeqNucToSeqPro $Nuc]
		    set N 0
		    foreach P [split $Pro ""] T [split $NucTraduit ""] {
			if {$P==$T} { incr N }
		    }
		    set Mismatch [expr $Max -$N]
		    set IsoformDuMismatch($Mismatch) $Tfa
		    lappend LesMismatch $Mismatch
		}
		set LesMismatch [lsort $LesMismatch]
		set Mismatch [lindex $LesMismatch 0]
		set Tfa $IsoformDuMismatch($Mismatch)
		set Seq [QueLaSequenceDuTexteTFA $Tfa]
		set EnteteIsoform [EnteteDuTexteTFA $Tfa]
		set Score [expr (100.*$Mismatch)/$Max]
		regsub "^>" $EnteteIsoform "$Entete " EnteteMrna
		append EnteteMrna " $Mismatch/$Max mismatch [format {%5.2f} $Score]%"
		if {$Score>9} { continue }
		Espionne Youpiiiiiiiii $EnteteMrna $FicNucTfa
		Sauve [SequenceFormatTFA $Seq $EnteteMrna nucbrut] dans $FicNucTfa
		lappend LaSortie "$Nom $Copain $EnteteIsoform"
	    }
	    
	}
    }
    return $LaSortie    
}

proc MrnaFromPourTous {} {
    set LaSortie {}
    file mkdir "[RepertoireDuGenome]/nuctfa"
    foreach Nom [ListeDesPABs] {
	set FicNucTfa  [GscopeFile $Nom "nuctfa"]
	set FicProtTfa [GscopeFile $Nom "prottfa"]
	if {[file exists $FicNucTfa]} {
	    set Nuc [QueLaSequenceDuTFA $FicNucTfa]
	    regsub -nocase {(TAA|TAG|TGA)$} $Nuc "" Nuc
	    set Pro [QueLaSequenceDuTFA $FicProtTfa]
	    if {[string length $Nuc]!=[string length $Pro]*3} {
		FaireLire "$Nom [string length $Nuc]!=[string length $Pro]*3"
	    }
	    set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]]
	    if {[string length $Nuc]==[string length $Pro]*3} {
		set NucTraduit [SeqNucToSeqPro $Nuc]
		if {$Pro==$NucTraduit} {
		    #FaireLire "j'ai coupe c'est bon"
		} else {
		    set N 0
		    foreach P [split $Pro ""] T [split $NucTraduit ""] {
			if {$P!=$T} { incr N }
		    }
		    FaireLire "$N ratés pour $Nom\n$Pro\n$NucTraduit"
		}
	    }
	    continue
	}
	set LesTfa [MrnasFrom $Nom]
	if {$LesTfa=={}} { Espionne "je ne rouve rien du tout pour $Nom" ; continue }
	set Seq [MrnaFrom $Nom]
	set Seq [string toupper $Seq]
	lappend LaSortie $Nom
	set AC [ExtraitInfo $Nom "AC:"]
	set ID [ExtraitInfo $Nom "AC:"]
	Sauve [SequenceFormatTFA $Seq "$Nom ${AC}_$ID" nucbrut] dans [GscopeFile $Nom "nuctfa"]
    }
    return $LaSortie    
}

proc CdsFromDec2016 ENST {
    set Seq [ContenuDuFichier "https://dec2016.rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"]
    return $Seq
}

proc MrnasFrom {Qui {FirstSeqOnly ""}} {
    #rR On rend les TFA de tous les mRNA de Qui
    #rR   sauf si FirstSeqOnly
    set FirstSeqOnly [string equal -nocase "FirstSeqOnly" $FirstSeqOnly]
    set LesMrnas {}
    set Nom $Qui
    if {[EstUnPAB $Qui]} { set Qui [ExtraitInfo $Qui "AC:"] }

    return $Seq
    set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui]  0]
    if {$CCDSs=={}} {
	#rR il se peut qu'il y ait des extension -1 -2 ...
	foreach I [NombresEntre 0 9] {
	    set Ext "-$I"
	    if {$I==0} { set Ext "" }
	    set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui$Ext]  0]
	    if {$CCDSs!={}} { break }
	}
	if {$CCDSs=={}} { return "" }
    }
    foreach CCDS $CCDSs {
	set ENSTs [lindex [IDMapping CCDS ENSEMBL_TRS $CCDS] 0]
	if {$ENSTs=={}} { continue }
	foreach ENST $ENSTs {
	    #Espionne $Nom $Qui $CCDS $ENST
	    #Espionne ContenuDuFichier "\"https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain\""
	    set Seq [ContenuDuFichier "https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"]
	    if {$Seq!=""} {
		if {$FirstSeqOnly} { return $Seq }
		set Entete "$ENST mRNA from CCDS $CCDS from ACC $Qui from $Nom"
		lappend LesMrnas [SequenceFormatTFA $Seq $Entete "nucbrut"]
	    }
	}
    }
    return $LesMrnas
}


#rR voir plus bas  BlastAli BlastIndel BlastStat et  Blastomics
proc DGB {} {
    set Xrefs ""
    set Texte [lindex [eFetchREST protein "np_001297087"] 0]
    DecortiqueGenBank OS OC Seq $Texte 0 0 "" "" DbXrefs
    Espionne $DbXrefs
    Espionne $Seq
    return DGB
}

proc AliIndel {{Qui ""} {Quoi ""} {FicAli ""}} {
    global AliIndel
    #rR Attention si FicAli a changé on recharge tout
    
    if {$FicAli!=""} {
	if {[info exists AliIndel("FicAli",)] && $AliIndel("FicAli",)!=$FicAli} {
	    if {[info exists AliIndel]} { unset AliIndel }
	}
	set AliIndel("FicAliMemo") $FicAli
    }
    if {[info exists AliIndel($Qui,$Quoi)]} { return $AliIndel($Qui,$Quoi) }
    if {[info exists AliIndel("EstCharge")]} { return "" }
    set AliIndel("EstCharge") 1
    set AliIndel(FicAli,) $AliIndel("FicAliMemo")
    
    set Query ""
    foreach Access [LaSequenceDuTFAs $AliIndel(FicAli,) "LaListeDesAccess"] {
	set SeqGap [LaSequenceDuTFAs $AliIndel(FicAli,) $Access]
	regsub {^[^_]+_} $Access "" AC
	regsub -all {[^A-Z]} $SeqGap "" Seq
	set LE [string length $SeqGap]
	lappend AliIndel(ListOf,AC) $AC
	set AliIndel($AC,LE) $LE 
	set AliIndel($AC,SS) $SeqGap 
	set AliIndel($AC,OS) "OS"; #[GeneQuid UniprotData $Access OS] 
	set AliIndel($AC,DE) "DE"; #[GeneQuid UniprotData $Access DE] 
	if {$Query==""} {
	    set AliIndel(Query,LE) $LE
	    set AliIndel(Query,SQ) $Seq
	    set AliIndel(Query,GQ) $SeqGap
	}
    }   
    return [AliIndel $Qui $Quoi]
}

proc AliStatPourTous {} {
    set LaSortie {}
    foreach Nom [ListeDesPABs] {
	set R [AliStat $Nom]
	lappend LaSortie $R
    }
    return $LaSortie
}

proc AliStat {{FichierAli ""} {CouOut ""} {PilOut ""}} {
    #rR sur le modele de BlastStat qui créait un ali avec blast 
    if {[EstUnPAB $FichierAli]} {
	set Nom $FichierAli
	set FichierAli [GscopeFile $Nom "aliprot"]
	set RepCouvert "[RepertoireDuGenome]/Couvert"
	set RepPiliers "[RepertoireDuGenome]/Piliers"
	file mkdir $RepCouvert
	file mkdir $RepPiliers
	set AC [ExtraitInfo $Nom "AC:"]
	set ID [ExtraitInfo $Nom "ID:"]
	set ACID "${AC}_$ID"
	if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$ACID.csv" }
	if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$ACID.csv" }
    } else {
	if {$CouOut==""} { set CouOut $FichierAli.Couvert.csv  }
	if {$PilOut==""} { set PilOut $FichierAli.Piliers.csv  }
    }

    AliIndel "" "" $FichierAli

    set QL [AliIndel Query LE]
    foreach I [NombresEntre 1 $QL] {
	set TaillePilier($I) 0
    }
    set PN 0.0
    foreach Access [AliIndel ListOf AC $FichierAli] {
	set LE [AliIndel $Access LE]
	set SS [AliIndel $Access SS]
	set DE [AliIndel $Access DE] ; regsub -all ";" $DE "," DE
	set OS [AliIndel $Access OS]
	set LesZones [regexp -all -indices -inline {[A-Z]+} $SS]
	set Couverture 0
	foreach Zone $LesZones {
	    lassign $Zone D F
	    set LZ [expr $F-$D+1]
	    incr Couverture $LZ
	    foreach I [NombresEntre $D $F] {
		incr TaillePilier($I) 
	    }
	}
	set PC [expr round(100.*$Couverture/$LE)]

	set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE]
	lappend LesCouvert [join $LaLigne ";"]
    }
    set Max 0
    foreach I [NombresEntre 1 $QL] {
	set T $TaillePilier($I)
	set Max [Maxi $Max $T]
    }
    foreach I [NombresEntre 1 $QL] {
	set T $TaillePilier($I)
	set L [expr (200*$T)/$Max + 1]
	set Ligne [string repeat "*" $L]
	#Espionne [format %4d $I] $Ligne
	lappend LesPiliers "$I;$T"
    }
    if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut }
    if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut }
    return [list $CouOut $PilOut]
    
}

proc InformeBBSCPourTous {} {
    file mkdir "[RepertoireDuGenome]/infos"
    foreach Nom [ListeDesPABs] {
	#Espionne $Nom
	set F [GscopeFile $Nom prottfa]
        set TFA [ContenuDuFichier $F]
        scan $TFA "%s %s" N AccessId
        lassign [split $AccessId "_"] Access I H
        if {$H!="HUMAN"} { FaireLire "$AccessId pas bon" }
        set ClesEmbl [GeneQuid UniprotData $Access "+AC,+ID,+GN,+DE"]
        set AC ""
        set ID ""
        set GN ""
        set DE ""
        foreach Ligne [split $ClesEmbl "\n"] {
            if { ! [regexp {^([A-Z][A-Z])   (.*)$} $Ligne Match K V]} { FaireLire $Ligne; continue }
            if {[set $K]!=""} { continue }
            set $K $V
        }
        regsub {;.*$} $AC "" AC
        regsub { .*$} $ID "" ID
        if {$Access!=$AC} {
            FaireLire "$Access\n$ClesEmbl"
        }
        set OriId ${I}_$H
        set BetterId ""
        if {$ID!="${I}_$H"} {
            #FaireLire "Better id $ID for $OriId"
            set BetterId $ID
        }
        Espionne $AC $Access
        Espionne $ID $OriId
        regexp {Name=([^; ]+)[; ]} $GN Match GN
        regexp {Full=([^;]+);} $DE Match DE
        Espionne $GN
        Espionne $DE
        InformeSansDemander $Nom "AC: $AC\nID: $OriId\nGN: $GN\nDE: $DE\nValiGN: $GN\nValiDE: $DE"
        if {$BetterId!=""} { InformeSansDemander $Nom "BetterID: $ID" }        
    }
    return bravo
}

proc CreateMsfAndMacsimFromTfaPourTous {} {
    file mkdir "[RepertoireDuGenome]/msf"
    file mkdir "[RepertoireDuGenome]/macsimXml"
    file mkdir "[RepertoireDuGenome]/macsimRsf"
    foreach Nom [ListeDesPABs] {
	Espionne $Nom
	set F [GscopeFile $Nom aliprot]
	set M [GscopeFile $Nom msf]
	set X [GscopeFile $Nom macsimXml]
	set R [GscopeFile $Nom macsimRsf]
	exec clustalw $F -convert -output=gscope    -outfile=$M
	#exec clustalw $F -convert -output=macsim -outfile=$X
	#exec clustalw $F -convert -output=RSF    -outfile=$R
    }
}

proc CreateProjectBBSC {} {
    set Rep "/genomics/link/BBSC"
    NousAllonsAuBoulot $Rep
    file mkdir aliprot
    file mkdir fiches
    file mkdir prottfa
    file mkdir copsprottfagap
    file mkdir copsprottfa
    set PF "BBSC"
    set I 0
    foreach FicAliprot [lsort [glob -nocomplain "DeKirsley/*.tfa"]] {
	incr I
	set Nom [format "%s%2.2d" $PF $I]
	set FicAli "aliprot/$Nom"
	file copy -force $FicAliprot $FicAli
	set Human ""
	foreach Access [LaSequenceDuTFAs $FicAli "LaListeDesAccess"] {
	    Espionne $Access
	    set SeqGap [QueLaSequenceDuTFAs $FicAli $Access]
	    regsub -all {[-\.~]} $SeqGap "" Seq
	    if {$Human==""} {
		set Human $Access
		set TFA [SequenceFormatTFA $Seq "$Nom $Human" protbrut]
		Sauve $TFA dans "prottfa/$Nom"
	    }
	    file mkdir copsprottfa/$Nom
	    file mkdir copsprottfagap/$Nom
	    set TFA    [SequenceFormatTFA $Seq    "$Access homolog to $Human $Nom" protbrut]
	    set TFAGap [SequenceFormatTFA $SeqGap "$Access homolog to $Human $Nom" protbrut]
	    Sauve $TFA    dans "copsprottfa/$Nom/$Access"
	    Sauve $TFAGap dans "copsprottfagap/$Nom/$Access"
	}
	lappend LesNoms $Nom
    }
    CreeBornesDesPABsTroisGradins 1 $I $PF "" "" "2.2d" "."
    OnRevientDuBoulot
    return CreateProjectBBSC
}

#rR ici on a ce qu'il faut pour traiter les blast de Claudine ... BlastAli BlastIndel BlastStat

proc BlastAliStatPourTous {} {
    set LaSortie {}
    foreach Nom [ListeDesPABs] {
	#lappend LaSortie [BlastAli  $Nom]
	lappend LaSortie [BlastStat $Nom]
    }
    return $LaSortie
}

proc OrganiseLesToposDeClaudine {} {
    set LaSortie {}
    if {0} {
	foreach Nom [ListeDesPABs] {
	    set Embl [ContenuDuFichier [GscopeFile $Nom protembl]]
	    if { ! [regexp {ID   ([^ ]+)( |$)}      $Embl Match ID]} { FaireLire "pas de ID dans $Nom" }
	    if { ! [regexp {\nAC   ([^ ;]+)(;| |$)} $Embl Match AC]} { FaireLire "pas de AC dans $Nom" }
	    if { ! [regexp {\nOX   ([^ ;]+)(;| |$)} $Embl Match OX]} { FaireLire "pas de OX dans $Nom" }
	    regsub -all {[^0-9]} $OX "" OX
	    InformeSansDemander $Nom "=ID: $ID"
	    InformeSansDemander $Nom "=Alias: $ID"
	    InformeSansDemander $Nom "=AC: $AC"
	    InformeSansDemander $Nom "=OX: $OX"
	    lappend LaSortie "$Nom $ID $AC $OX"
	}
	return $LaSortie
    }
    set RepBlastOri    "[RepertoireDuGenome]/blastOri"
    set RepTfaFromBOri "[RepertoireDuGenome]/tfaFromBOri"
    set RepMemo        "[RepertoireDuGenome]/memo"

    file mkdir $RepBlastOri
    file mkdir $RepTfaFromBOri
    file mkdir $RepMemo

    NousAllonsAuBoulot [RepertoireDuGenome]
    foreach Nom [ListeDesPABs] {
	set ID [ExtraitInfo $Nom "ID:"]
	set RepTravail ${ID}_Arch
	set BlastOriOri "$RepTravail/${ID}_Arch.blast"
	Espionne $BlastOriOri
	set BlastOri [GscopeFile $Nom "blastOri"]
	file copy -force $BlastOriOri $BlastOri
	lappend LaSortie $BlastOri
    }
    OnRevientDuBoulot
    return $LaSortie
}
proc LesOsDesAcDeClaudine {} {
    foreach Ligne [LesLignesDuFichier "/genomics/link/Topos/TOPO6B/LesAccessAvecCaca.txt"] {
	regsub {>} $Ligne "" Ligne
	regsub {_[^_]+$} $Ligne "" Ligne
	#Espionne $Ligne
	lappend LesBonsAccess $Ligne
    }
    set Retour [GeneQuid UniprotData [join $LesBonsAccess ","] "+QY,+OS"]
    set LeOsCourant {}
    set OldQY ""
    set LaSortie {}
    foreach Ligne [split $Retour "\n"] {
	if {[regexp {QY   (.*)$} $Ligne Match Access]} {
	    if {$OldQY!=""} {
		set OsCourant [join $LeOsCourant " "]
		if {$OsCourant==""} {
		    regsub {_.+$} $Access "" Mieux
		    set OS [join [GeneQuid UniprotData $Mieux OS] " "]
		    if {$OS!=""} {
			set OsCourant $OS
		    } else {
			Espionne "Rien pour $Access" }
		}
		lappend LaSortie "$OldQY $OsCourant"
		set LeOsCourant {}
	    }
	    set OldQY $Access
	}
	if {[regexp {OS   (.*)$} $Ligne Match OS]} {
	    set OS [string trim $OS]
	    lappend LeOsCourant $OS
	}
    }
    set Fichier [SauveLesLignes $LaSortie dans "/genomics/link/Topos/TOPO6B/QyAc.txt"]
    return $Fichier
}

proc BlastStat {{FichierBlast ""} {CouOut ""} {PilOut ""}} {

    if {[EstUnPAB $FichierBlast]} {
	set Nom $FichierBlast
	set FichierBlast [GscopeFile $Nom "blastOri"]
	set RepCouvert "[RepertoireDuGenome]/Couvert"
	set RepPiliers "[RepertoireDuGenome]/Piliers"
	file mkdir $RepCouvert
	file mkdir $RepPiliers
	set Alias [ExtraitInfo $Nom "Alias" ]
	if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$Alias.csv" }
	if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$Alias.csv" }
    } else {
	if {$CouOut==""} { set CouOut $FichierBlast.Couvert.csv  }
	if {$PilOut==""} { set PilOut $FichierBlast.Piliers.csv  }
    }

    FromBlastIndel "" "" $FichierBlast

    set QL [FromBlastIndel Query LE]
    foreach I [NombresEntre 1 $QL] {
	set TaillePilier($I) 0
    }
    foreach Access [FromBlastIndel ListOf AC] {
	set LE [FromBlastIndel $Access LE]
	set SS [FromBlastIndel $Access SS]
	set PN [FromBlastIndel $Access PN]
	set DE [FromBlastIndel $Access DE] ; regsub -all ";" $DE "," DE
	set OS [FromBlastIndel $Access OS]
	set LH [FromBlastIndel $Access LH]
	set LesZones [regexp -all -indices -inline {[A-Z]+} $SS]
	set Couverture 0
	foreach Zone $LesZones {
	    lassign $Zone D F
	    set LZ [expr $F-$D+1]
	    incr Couverture $LZ
	}
	set PC [expr round(100.*$LZ/$LE)]
	#Espionne [format "%-20s %4d / %4d = %3d%s   %s" $Access $LZ $LE $PC % $LesZones]
	set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE]
        foreach H $LH {
	    lassign $H AccessLu nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
	    foreach I [NombresEntre $DebutQuery $FinQuery] {
		incr TaillePilier($I) 
	    }
	}
	lappend LesCouvert [join $LaLigne ";"]
    }
    set Max 0
    foreach I [NombresEntre 1 $QL] {
	set T $TaillePilier($I)
	set Max [Maxi $Max $T]
    }
    foreach I [NombresEntre 1 $QL] {
	set T $TaillePilier($I)
	set L [expr (200*$T)/$Max + 1]
	set Ligne [string repeat "*" $L]
	Espionne [format %4d $I] $Ligne
	lappend LesPiliers "$I;$T"
    }
    if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut }
    if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut }
    return [list $CouOut $PilOut]

}

proc TBA {} {
    set LesAC [list WP_011223034.1 RLE46311.1 WP_048156073.1 WP_048120871.1 WP_116481489.1 WP_015054741.1 PKL53186.1 KXS40906.1 WP_011307748.1 WP_048117745.1 ABK14390.1 WP_048124872.1 WP_011021597.1 WP_048039201.1 WP_048038741.1 WP_048171932.1 WP_048182357.1 WP_048107223.1 WP_048195132.1 WP_048205782.1 WP_095645014.1 WP_048137421.1]
    return [BlastAli blast_Ark_Topo6B.txt $LesAC] 
}

proc BlastAliComprime {Fichier} {
    set I -1
    set LesAccess [LaSequenceDuTFAs $Fichier "LaListeDesAccess"]
    foreach Access $LesAccess {
	incr I
	lappend LesI $I
	set Tfa [LaSequenceDuTFAs $Fichier $Access]
	set Seq [QueLaSequenceDuTexteTFA $Tfa]
	set LesC [split $Seq ""]
	set J -1
	foreach C $LesC {
	    incr J
	    set T($I,$J) $C
	    if {$C ne "-"} { lappend LesIduNonVide($J) $I; incr TaillePilier($J) }
	}
    }
    set DernierJ $J
    set LesJ [NombresEntre 0 $DernierJ]
    foreach J $LesJ {
	if { ! [info exists TaillePilier($J)] || $TaillePilier($J)>1} { continue }
	set I [lindex $LesIduNonVide($J) 0]
	set K $J
	set KVide ""
	set Comprime 0
	while {$K<$DernierJ} {
	    incr K
	    if {$T($I,$K) eq "-"} {
		set KVide $K
		while {$K > $J} {
		    set T($I,$K) $T($I,[incr K -1])
		    Espionne T($I,$K) $T($I,$K)
		}
		set  T($I,$J) "-"
		set Comprime 1
		break
	    }
	}	    
	if {$Comprime} {
	    incr TaillePilier($KVide)
	    set JComprime($J) $I
	}
    }
    set LeTFAs {}
    set I -1
    foreach Access $LesAccess {
	incr I
	set LaSeq {}
	foreach J [NombresEntre 0 $DernierJ] {
	    if {[info exists JComprime($J)]} { continue }
	    lappend LaSeq $T($I,$J)
	}
	set Seq [join $LaSeq ""]
	set SeqOri [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]]
	if {0 && $SeqOri ne $Seq} {
	    Espionne 
	    Espionne $Access
	    Espionne [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]]
	    Espionne $Seq
	} 
	set TFA [SequenceFormatTFA $Seq $Access "protbrut"]
	#Espionne $TFA
	lappend LeTFAs $TFA
    }
    set New $Fichier
    regsub {.tfa$} $New "" New
    append New "_new.tfa"
    SauveLesLignes $LeTFAs dans $New 
    return $New
}

proc BlastAli {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""}} {
    global BlastIndel

    set DisplayStats 0
    set ShowStats 0
    if {[regexp -nocase "ShowStats|DisplayStats" $Nieme]} {
	if {[regexp -nocase "DisplayStats" $Nieme]} { set DisplayStats 1 }
	set ShowStats 1
	set Nieme ""
    }
    if {[EstUnPAB $Fichier]} {
	set Nom $Fichier
	set Fichier    [GscopeFile $Nom "blastOri"]
	set Memo       [GscopeFile $Nom "memo"]
	set FichierTfa [GscopeFile $Nom "tfaFromBOri"]
    } else { 
	set Memo "${Fichier}_[join $Nieme _].memo"
	set FichierTfa $Fichier
	regsub {.txt$} $FichierTfa "" FichierTfa
	append FichierTfa .tfa
    }

    if {[file exists $Memo]} {
	array set BlastIndel [ContenuDuFichier $Memo]
    } else {
	BlastIndel $Fichier $Nieme $SeuilExpect $MaxListe
	Sauve [array get BlastIndel] dans $Memo
    }
    set GapApres(trululu) ""
    foreach K [array names BlastIndel "wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww"] {
	Espionne BlastIndel($K)=$BlastIndel($K)=
    }
    #exit
    set AVoir "WP_049995565.1==================="
    foreach AC $BlastIndel(ListOf,AC) {
	set MontreAC 0
	if {$AC==$AVoir} { set MontreAC 1 }
	#Espionne
	#Espionne $BlastIndel($AC,AC)  $BlastIndel($AC,OS) $BlastIndel($AC,LE) $BlastIndel($AC,DE)
	set IemeH 0
	foreach H $BlastIndel($AC,LH) {
	    incr IemeH	    
	    lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
	    set IQ [expr $DebutQuery-1]
	    set IS [expr $DebutSbjct-1]
	    set J  [expr $IQ-1]
	    if {$MontreAC} {
		Espionne AC $AC
		EspionneL $H
		Espionne IQ=$IQ IS=$IS
		Espionne SeqQuery
		Espionne =$SeqQuery=
		Espionne SeqSbjct
		Espionne =$SeqSbjct=
	    }
	    foreach AQ [split $SeqQuery ""]  AS [split $SeqSbjct ""] {
		incr J
		if {$AQ=="-" && $AS=="-"} {
		    set GapAt(Query,$J) 1
		    set GapAt($Access,$J) 1
		    #incr GapApres(Q,$AC,$IQ)
		    incr GapApres(S,$AC,$IS)
		    continue
		}
		if {$AQ=="-" && $AS!="-"} {
		    set GapAt(Query,$J) 1
		    incr IS
		    incr GapApres(Q,$AC,$IQ)
		    lappend SousQueryEtendu($IQ) "$AC $IS"
		    #Espionne SousQueryEtendu($IQ) "$AC $IS $GapApres(Q,$AC,$IQ)"
		    continue
		}
		if {$AQ!="-" && $AS=="-"} {
		    set GapAt($Access,$J) 1
		    incr IQ
		    incr GapApres(S,$AC,$IS)
		    continue
		}
		if {$AQ!="-" && $AS!="-"} {
		    incr IQ
		    incr IS
		    lappend SousQuery($IQ) "$AC $IS"
		    continue
		}
	    }	    
	}
    }
    foreach Key [array names GapAt(*$AVoir*)] {
	Espionne GapAt($K) $GapAt($K)
    }
    #exit
    set LengthWithGap $BlastIndel(Query,LE)
    set SeqGap ""
    set Seq $BlastIndel(Query,SQ)
    foreach IQ [NombresEntre 0 [expr $BlastIndel(Query,LE)-1]] {
	set MaxGap 0
	foreach K [array names GapApres -glob "Q,*,$IQ"] {
	    set MaxGap [Maxi $MaxGap $GapApres($K)]
	    #Espionne $IQ GapApres($K) $GapApres($K) $MaxGap
	}
	if {$ShowStats} {
	    set Message "[format {%4d %3d} $IQ $MaxGap] [string repeat * $MaxGap]"
	    lappend LesStats $Message
	    Espionne $Message
	}
	incr LengthWithGap $MaxGap
	append SeqGap [string index $Seq $IQ-1]
	append SeqGap [string repeat "-" $MaxGap]
    }
    if {$DisplayStats} { return [AfficheListe $LesStats "" "Couvert/$Nom"] }
    if {$ShowStats} { return "FinShowStats" }
    #parray SousQuery
    #parray GapApres
    #Espionne $LengthWithGap
    set IQ 0
    set J 0
    foreach Q [split $SeqGap ""] {
	set Ali(Query,$J) $Q
	if {$Q!="-"} { incr IQ }
	if {[info exists SousQuery($IQ)]} { 
	    foreach ACPos $SousQuery($IQ) {
		scan $ACPos "%s %s" AC Pos
		set Ali($AC,$J) [string index $BlastIndel($AC,SS) $Pos-1]	    
	    }
	    unset SousQuery($IQ)
	}
	if {[info exists SousQueryEtendu($IQ)]} {
	    if {[info exists iDe]} { unset iDe }
	    foreach ACPos $SousQueryEtendu($IQ) {
		scan $ACPos "%s %s" AC Pos
		incr iDe($AC)
		set Ali($AC,[expr $J+$iDe($AC)]) [string index $BlastIndel($AC,SS) $Pos-1]
	    }
	    unset SousQueryEtendu($IQ)
	}
	incr J
    }

    set LesLignes {}
    foreach AC [concat [list Query] $BlastIndel(ListOf,AC)] {
	set MontreAC 0
	set Previous ""
	set Ligne ""
	set IS 0
	foreach J [NombresEntre 0 [expr $LengthWithGap-1]] {
	    if {$MontreAC} { Espionne $AC $J info exists Ali($AC,$J) [info exists Ali($AC,$J)] [info exists GapAt($AC,$J)] }
	    if {[info exists Ali($AC,$J)]} {
		append Ligne $Ali($AC,$J)
		set Previous $Ali($AC,$J)
		incr IS
	    } elseif {$Previous=="-"} {
		#Espionne 2 $J -
		append Ligne "-"
		set Previous "-"
	    } elseif {[info exists GapAt($AC,$J)]} {
		#Espionne 3 $J -
		append Ligne "-"
		set Previous "-"
	    } else {		    
		#Espionne 4 $J .
		append Ligne .
		set Previous .
	    }
	}
	#Espionne [string range $Ligne 0 250]
	lappend LesLignes ">$AC"
	regsub -all {\.} $Ligne "-" Ligne
	regsub -all {\?} $Ligne "X" Ligne
	lappend LesLignes $Ligne
    }
    return [SauveLesLignes $LesLignes dans $FichierTfa]
}

proc FromBlastIndel {{Qui ""} {Quoi ""} args} {
    global BlastIndel
    #rR Attention si args est non vide on recharge tout

    if {$args!={}} {
	if {[info exists BlastIndel]} { unset BlastIndel }
	BlastIndel {*}$args
	return [FromBlastIndel $Qui $Quoi]
    }
    if {[info exists BlastIndel($Qui,$Quoi)]} { return $BlastIndel($Qui,$Quoi) }
    if {[info exists BlastIndel("EstCharge")]} { return "" }
    set BlastIndel("EstCharge") 1
    return [FromBlastIndel $Qui $Quoi]
}

proc BlastIndel {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""} {Qui ""} {Quoi ""}} {
    global BlastIndel

    if {[info exists BlastIndel]} { unset BlastIndel }         ; #rR 2019/01/21

    set NiemeEstUnNombre [regexp {^[0-9]+$} $Nieme]
    set NiemeEstUnAccess [regexp -nocase {[a-z]} $Nieme]
    if {$NiemeEstUnNombre} { set MaxListe $Nieme }
    set NbHits [DecortiqueBlast $Fichier $SeuilExpect $MaxListe Query lBanqueId lAccess lDE lProfil lPN lPartieSegAli]
    set Tout [ContenuDuFichier $Fichier]
    set QueryLength [IntegerApres "Length=" dans $Tout]
    if {$QueryLength=="" &&  ! [regexp {\(([0-9]+) letters\)} $Tout Match QueryLength]} { 
	FaireLire "I cannot find the QueryLength"
    }
    Espionne $QueryLength
    set SQ [string repeat "?" $QueryLength]

    set N 0
    foreach BId $lBanqueId PN $lPN SegAli $lPartieSegAli {
	incr N
	#Espionne BId $BId $N/$NbHits $Nieme $NiemeEstUnNombre $NiemeEstUnAccess
	if {$NiemeEstUnNombre &&   $N!=$Nieme} { continue }
	if {$NiemeEstUnAccess && [lsearch $Nieme $BId]<0} { continue } 
	#EspionneL  $SegAli
	set Access ""
	set Definition ""
	set OS "Unknown unknown"
	set Entete [join $SegAli " "]
	regsub {\n Score = .+$} $Entete "" Entete 
	regsub -all { +} $Entete " " Entete
	set Entete [string trim $Entete]
	
	if {        [regexp {>([^ ]+) ([^\[]+)\[([^\]]+)\]}        $Entete Match Access Definition OS]} {
	} else {
	    if {    [regexp {>([^ ]+) RecName: Full=(.+) AltName:} $Entete Match Access Definition]} {
	    } else {
		if {[regexp {>([^ ]+) (.+)$}            [lindex $SegAli 0] Match Access Definition]} {
		} else {
		    Espionne $Entete; FaireLire "Ca merde pour $BId"; continue
		}
	    }
	}
	set Access     [string trim $Access]
	
	set Definition [string trim $Definition]
	set OS         [string trim $OS]
	set Length -1
	set SS ""
	lappend SegAli "FinSegAli" "FinSegAli" "FinSegAli" 
	lassign [LectureSegAli $Access $SQ $OS $SegAli] SQ SS Length ListeDesHomologues

	if {[info exists DejaVuQueryEn]} { unset DejaVuQueryEn }
	if {[info exists DejaVuSbjctEn]} { unset DejaVuSbjctEn }
	foreach H $ListeDesHomologues {
	    lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
	    set MontreAC [string equal $Access "WP_049995561.1"]
	    set SeqQueryGarde ""
	    set SeqConseGarde ""
	    set SeqSbjctGarde ""
	    set iQueryCourant [expr $DebutQuery-1]
	    set iSbjctCourant [expr $DebutSbjct-1]
	    if {[info exist DebutQueryGarde]} { unset DebutQueryGarde }
	    if {[info exist DebutSbjctGarde]} { unset DebutSbjctGarde }
	    set OnGarde 0
	    if {[info exists DejaVuQueryLocalEn]} { unset DejaVuQueryLocalEn }
	    if {[info exists DejaVuSbjctLocalEn]} { unset DejaVuSbjctLocalEn }
	    foreach RQ [split $SeqQuery ""] RC [split $SeqConse ""] RS [split $SeqSbjct ""] {
		if {$RQ!="-"} {
		    incr iQueryCourant
		    if { ! [info exists DebutQueryGarde]} {
			set DebutQueryGarde $iQueryCourant
			set OnGarde 1
		    }
		    set FinQueryGarde $iQueryCourant
		}
		if {$RS!="-"} {
		    incr iSbjctCourant
		    if { ! [info exists DebutSbjctGarde]} { 
			set DebutSbjctGarde $iSbjctCourant
			set OnGarde 1
		    }
		    set FinSbjctGarde $iSbjctCourant
		}
		#Espionne =====================================================================$nCops=$RQ=$RS=DejaVuQueryEn($iQueryCourant) DejaVuSbjctEn($iSbjctCourant) [info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)]
		if {[info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)]} {
		    continue
		}
		#Espionne =$RQ=$RS=$OnGarde=
		if {$OnGarde} {
		    append SeqQueryGarde $RQ
		    append SeqConseGarde $RC
		    append SeqSbjctGarde $RS
		}
		set DejaVuQueryLocalEn($iQueryCourant) 1
		set DejaVuSbjctLOcalEn($iSbjctCourant) 1
		set FinQueryGarde $iQueryCourant
		set FinSbjctGarde $iSbjctCourant
	    }
	    set HomologieGarde [list $Access $nCops $nCopsDansCetteSequence $OS $Score $Expect $DebutQueryGarde $FinQueryGarde $DebutSbjctGarde $FinSbjctGarde $SeqQueryGarde $SeqConseGarde $SeqSbjctGarde]
	    lappend ListeDesHomologuesGarde $HomologieGarde
	    lappend BlastIndel($Access,LH) $HomologieGarde

	    foreach I [array names DejaVuQueryLocalEn] { set DejaVuQueryEn($I) 1 }
	    foreach I [array names DejaVuSbjctLocalEn] { set DejaVuSbjctEn($I) 1 }

	}
	set LH [lsort -integer -index 6 $ListeDesHomologuesGarde]

	lappend BlastIndel(ListOf,AC) $Access
	set BlastIndel($Access,AC) $Access
	set BlastIndel($Access,PN) $PN
	set BlastIndel($Access,DE) $Definition
	set BlastIndel($Access,OS) $OS
	set BlastIndel($Access,SS) $SS
	set BlastIndel($Access,LE) $Length
    }
    set BlastIndel(Query,LE) $QueryLength
    set BlastIndel(Query,SQ) $SQ
    if {$Qui!=""} { return $BlastIndel($Qui,$Quoi) }
    return "BlastIndel" 
}


if {[OnTraite CilioCarta]} { source /home/ripp/gscope/gscope_blome.tcl }

proc BlastomicsDir {} {
    return "/home/ripp/www/blastomics"
}

proc BlastomicsDbDir {} {
    return "[BlastomicsDir]/db"
}

proc TestBlastomicsSql {Projet Phylum} {
    set LeSql [list]
    lappend LeSql "SELECT COUNT(ln_organism_taxobla.pk_taxobla), specie, taxid, ln_organism_taxobla.pk_organism, clade.nom "
    lappend LeSql "FROM ln_organism_taxobla join taxobla ON taxobla.pk_taxobla =ln_organism_taxobla.pk_taxobla "
    lappend LeSql "JOIN organism ON organism.pk_organism= ln_organism_taxobla.pk_organism "
    lappend LeSql "JOIN ln_clade_organism ON ln_clade_organism.pk_organism = organism.pk_organism "
    lappend LeSql "JOIN clade ON clade.pk_clade=ln_clade_organism.pk_clade "
    lappend LeSql "GROUP BY organism.pk_organism ORDER BY clade.pk_clade"
    set Sql [join $LeSql " "]
    set R [SqlExecForDatabase "[BlastomicsDbDir]/${Projet}_${Phylum}.db" $Sql GetList]
    EspionneL $R
    exit
}


proc BlastomicsCladesClaudineNeSertPlus {} {
    #rR attention il y a ausi des choses qui ressemblent dans gscope_oi ...
    set LesPhylum [SqlExecForDatabase "[BlastomicsDbDir]/AHalma_Archaea.db" "select nom from clade order by nom" "GetList"]
    set LesPhylum [OrderedClades $LesPhylum]
    foreach PhName  $LesPhylum {
	set PhTaxId [Tax $PhName TaxId]
	set PhClass [TaxClass $PhTaxId Name]
	Espionne [format "%10d %-30s %s" $PhTaxId $PhName $PhClass]
    }

    OiDomain Archaea
    foreach OX [OiCode ListOf OX] {
	set OS [OiCode $OX OS]
	Espionne "$OX $OS"
    }
}



proc CanalSqlTaxobla {{Bdd ""}} {

    if {$Bdd!="" && [FileAbsent $Bdd]} { set Bdd "[BlastomicsDbDir]/$Bdd" }

    Espionne $Bdd

    set Queue [file tail $Bdd]
    if { "[CanalSql "GetDbname"].db" != $Queue } {
	Espionne [CanalSql "GetDbname"]
	Espionne $Queue
	CanalSqlDisconnect
	CanalSql [ConnInfoForDatabase $Bdd]
    }
    return [CanalSql]
}

proc BlastomicsFilterTaxobla {{Bdd ""} {ListOfPkOrg ""} {ListOfNot ""} {ListOfCladeCounts ""}} {
    #rR Attention cette procédure se rappelle elle-même pour les Not
    CanalSqlTaxobla $Bdd

    set UseUnion [string equal -nocase $ListOfCladeCounts "UseUnion"]

    LogWscope $ListOfPkOrg
    LogWscope $ListOfNot

    if {$ListOfPkOrg=="-"} { set ListOfPkOrg "" }
    if {$ListOfNot  =="-"} { set ListOfNot   "" }

    if {[regexp {[^0-9]} $ListOfPkOrg]} {
	regsub -all {[^0-9]+} $ListOfPkOrg "," ListOfPkOrg
	set ListOfPkOrg [split [string trim $ListOfPkOrg ","] ","]
    }

    if {$ListOfPkOrg==""} {
	set LesPkTot [SqlExec "select pk_taxobla from taxobla" "GetList"]
    } else {
	set PremiereFois 1
	foreach PkO $ListOfPkOrg {
	    set Sql "select pk_taxobla from ln_organism_taxobla where pk_organism==$PkO"
	    set LesPkT [SqlExec $Sql "GetList"]
	    if {$PremiereFois} {
		set LesPkTot $LesPkT
		set PremiereFois 0
	    } else {
		if {$UseUnion} {
		    set LesPkTot [ListsUnion $LesPkTot $LesPkT]
		} else {
		    set LesPkTot [ListsIntersection $LesPkTot $LesPkT]
		}
	    }
	}
    }

    if {$ListOfNot=="GetListOfPk"} { return $LesPkTot }

    if {$ListOfNot!=""} {
	set LesNot [BlastomicsFilterTaxobla $Bdd $ListOfNot "GetListOfPk" "UseUnion"]
	set LesPkTot [ListsComplement $LesPkTot $LesNot]
    }

    #####################if {$ListOfCladeCounts==""} { return $LesPkTot }

    #rR on suppose qu'il n 'y a pas de Absence checkée si un nombre de species est demandé.
    #rR donc on peut filter sur le nombre voulu.
    set LesNbHitsDesClades {}
    foreach CC [split $ListOfCladeCounts ","] {
	LogWscope $CC
	set CountText ""
	lassign [split $CC "/"] Clade CountText
	LogWscope "$Clade $CountText"
	set LesBornes [split $CountText "-"]
	set D [lindex $LesBornes 0]
	set F [lindex $LesBornes end]
	set LesBonsNb {}
	set LeSql {}
	lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade "
	lappend LeSql " where "
	lappend LeSql "              '$Clade'     =              clade.nom "
	lappend LeSql "   and   clade.pk_clade    =              sapin.pk_clade "
	lappend LeSql "   and   sapin.pk_sapin    =            branche.pk_sapin "
	lappend LeSql "   and branche.pk_branche  = ln_branche_taxobla.pk_branche "
	lappend LeSql "   and branche.id_branche >= $D and branche.id_branche <= $F"
	set Sql [join $LeSql " "]
	regsub -all { +} $Sql " " Sql
	LogWscope $Sql
	set LesBonsNb [SqlExec $Sql "GetList"]
	set Nb [llength $LesBonsNb]
	LogWscope "$Nb trouves"
	lappend LesNbHitsDesClades "$Clade=$D-$F"
	set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb]
    }
    #foreach CC [split $ListOfCladeCounts ","] {
    #	LogWscope $CC
    #	scan $CC "%d/%s" PkClade CountText
    #	set LesBornes [split $CountText "-"]
    #	set D [lindex $LesBornes 0]
    #	set F [lindex $LesBornes end]
    #	set LesBonsNb {}
    #	set LeSql {}
    #	lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade "
    #	lappend LeSql " where "
    #	lappend LeSql "              $PkClade     =              sapin.pk_clade "
    #	lappend LeSql "   and   sapin.pk_sapin    =            branche.pk_sapin "
    #	lappend LeSql "   and branche.pk_branche  = ln_branche_taxobla.pk_branche "
    #	lappend LeSql "   and branche.id_branche >= $D and branche.id_branche <= $F"
    #	set Sql [join $LeSql " "]
    #	regsub -all { +} $Sql " " Sql
    #	Espionne $Sql
    #	set LesBonsNb [SqlExec $Sql "GetList"]
    #	set Nb [llength $LesBonsNb]
    #	lappend LesNbHitsDesClades "$PkClade=$D-$F"
    #	set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb]
    #    }
    
    set LesPk {}
    set LesGscopeId {}
    set LesGeneId {}
    set Sql "select pk_taxobla, gscopeid, gene_id from taxobla where pk_taxobla in ([join $LesPkTot ,])"
    LogWscope $Sql
    foreach {Pk GscopeId GeneId} [SqlExec $Sql "GetList"] {
	lappend LesPk       $Pk
	lappend LesGscopeId $GscopeId
	if {$GeneId!=""} { lappend LesGeneId $GeneId }
    }

    set Pks             [join $LesPk " "]
    set Noms            [join $LesGscopeId " "]
    set GeneIds         [join $LesGeneId " "]
    set NbHitsDesClades [join $LesNbHitsDesClades " "]
    set Resultat "$Pks/$Noms/$GeneIds/$NbHitsDesClades"
    return $Resultat
}

proc CladeContentWithinOi2017 {Clade {KindOfClades ""}} {
    JeMeSignale
    #rR on ne garde que ceux qui existent aussi dans Oi2017 ATTENTION AU DOMAINE ... à faire

    if {$KindOfClades==""} {
	if {[OiDomain]=="Archaea"} { set KindOfClades "Archaea" }
	if {[OiDomain]=="Eukaryota"} { set KindOfClades "CMC" }         ; #rR a corriger pour la suite   !!!!!!!!!!!
    }
    Espionne "OiDomain =[OiDomain]= KindOfClades =$KindOfClades="
    if {$KindOfClades=="CMC"} {
	set Domaine "Eukaryota"
	set LesTaxId [CMC $Clade OX]
    } elseif  {$KindOfClades=="JoyFungi"} {
	set Domaine "Eukaryota"
	set LesTaxId [Paraph $KindOfClades $Clade OX]
    } elseif  {$KindOfClades=="Bac8"} {
	set Domaine "Bacteria"
	set LesTaxId [Paraph $KindOfClades $Clade OX]
    } elseif  {$KindOfClades=="Bac9"} {
	set Domaine "Bacteria"
	set LesTaxId [Paraph $KindOfClades $Clade OX]
    } elseif  {$KindOfClades=="TTLL"} {
	set Domaine "Eukaryota"
	set LesTaxId [TTLL $Clade OX]
    } elseif  {$KindOfClades=="Archaea"} {
	set Domaine "Archaea"
	set LesTaxId [PhylAr $Clade ListOfOX]
    } else {
	set Domaine "Eukaryota"
	set LesTaxId [CladeContent $Clade]
    }
    set LesBons {}
    foreach TaxId $LesTaxId {
	#if {[OiCodeForDomain $Domaine $TaxId OI]==""} { continue }
	if {[OiCode $TaxId OI]==""} { continue }
	lappend LesBons $TaxId
    }
    return $LesBons
}


proc BlastomicsCreateDb {{Project ""} {KindOfClades ""} {GetWhat ""}} {

    if {$Project==""} { set Project [file tail [RepertoireDuGenome]] }

    set Bdd "[BlastomicsDbDir]/${Project}_${KindOfClades}.db"
    
    if {[FileExists $Bdd]} {
        if {[OuiOuNon "$Bdd already exists. Do I overwrite ?"]} {
            Garde $Bdd
	    file delete $Bdd
	} else {
            return "Error: Already exists"
	}
    }
    Espionne [CanalSql [ConnInfoForDatabase $Bdd "CreateIfNotExists"]]

    SqlExec "BEGIN TRANSACTION"

    SqlExec "DROP TABLE IF EXISTS ln_clade_organism"
    SqlExec "DROP TABLE IF EXISTS ln_organism_taxobla"
    SqlExec "DROP TABLE IF EXISTS ln_branche_taxobla"
    SqlExec "DROP TABLE IF EXISTS branche"
    SqlExec "DROP TABLE IF EXISTS sapin"
    SqlExec "DROP TABLE IF EXISTS organism"
    SqlExec "DROP TABLE IF EXISTS taxobla"
    SqlExec "DROP TABLE IF EXISTS clade"

    SqlExec "CREATE TABLE clade   (pk_clade    INTEGER , nom           VARCHAR(15))"
    SqlExec "CREATE TABLE organism(pk_organism INTEGER , taxid         VARCHAR(10), 
                                                         specie        VARCHAR(25))"
    SqlExec "CREATE TABLE taxobla (pk_taxobla  INTEGER , gscopeid      VARCHAR(10), 
                                                         chemin        VARCHAR(20),
                                                         uniprotaccess VARCHAR(12),
                                                         uniprotid     VARCHAR(15), 
                                                         gene_name     VARCHAR(20), 
                                                         gene_id       VARCHAR(20))"
    SqlExec "CREATE TABLE branche (pk_branche  INTEGER , pk_sapin   INTEGER, 
                                                         id_branche INTEGER, 
                                                         largeur    INTEGER)"

    SqlExec "CREATE TABLE ln_clade_organism   (pk_clade     INTEGER,
                                               pk_organism  INTEGER)"

    SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla   INTEGER, 
                                               pk_organism  INTEGER)"

    SqlExec "CREATE TABLE ln_branche_taxobla  (pk_branche   INTEGER, 
                                               pk_taxobla   INTEGER)"

    SqlExec "CREATE TABLE sapin               (pk_sapin     INTEGER, 
                                               pk_clade     INTEGER)"


    if {0} {
    SqlExec "CREATE TABLE clade   (pk_clade    INTEGER PRIMARY KEY AUTOINCREMENT, nom           VARCHAR(15))"
    SqlExec "CREATE TABLE organism(pk_organism INTEGER PRIMARY KEY AUTOINCREMENT, taxid         VARCHAR(10), 
                                                                                  specie        VARCHAR(25))"
    SqlExec "CREATE TABLE taxobla (pk_taxobla  INTEGER PRIMARY KEY AUTOINCREMENT, gscopeid      VARCHAR(10), 
                                                                                  chemin        VARCHAR(20),
                                                                                  uniprotaccess VARCHAR(12),
                                                                                  uniprotid     VARCHAR(15), 
                                                                                  gene_name     VARCHAR(20), 
                                                                                  gene_id       VARCHAR(20))"
    SqlExec "CREATE TABLE branche (pk_branche  INTEGER PRIMARY KEY AUTOINCREMENT, pk_sapin   INTEGER, 
                                                                                  id_branche INTEGER, 
                                                                                  largeur    INTEGER, 
                                   FOREIGN KEY(pk_sapin) REFERENCES sapin(pk_sapin))"

    SqlExec "CREATE TABLE ln_clade_organism   (pk_clade     INTEGER,
                                               pk_organism  INTEGER, 
                                   PRIMARY KEY(pk_clade, pk_organism), 
                                   FOREIGN KEY(pk_clade)    REFERENCES clade(pk_clade), 
                                   FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism))"
    SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla   INTEGER, 
                                               pk_organism  INTEGER, 
                                   PRIMARY KEY(pk_taxobla, pk_organism), 
                                   FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism), 
                                   FOREIGN KEY(pk_taxobla)  REFERENCES taxobla(pk_taxobla))"
    SqlExec "CREATE TABLE ln_branche_taxobla  (pk_branche   INTEGER, 
                                               pk_taxobla   INTEGER, 
                                   PRIMARY KEY(pk_branche, pk_taxobla), 
                                   FOREIGN KEY(pk_branche)  REFERENCES branche(pk_branche),
                                   FOREIGN KEY(pk_taxobla)  REFERENCES taxobla(pk_taxobla))"
    SqlExec "CREATE TABLE sapin               (pk_sapin     INTEGER PRIMARY KEY AUTOINCREMENT, 
                                               pk_clade     INTEGER, 
                                   FOREIGN KEY(pk_clade)    REFERENCES clade(pk_clade)) "
    }
    SqlExec "COMMIT"

    if {$GetWhat=="GetHandleAndFilename"} { return [list [CanalSql] $Bdd] }
    if {$GetWhat=="GetHandle"           } { return [CanalSql] }
    CanalSqlDisconnect
    return $Bdd
}

proc BlastomicsNewQuery {{Project ""} {KindOfClades ""}} {
    
    set Overwrite 1

    set RepSql "[RepertoireDuGenome]/blomesql"
    file mkdir $RepSql

    lassign [BlastomicsCreateDb $Project $KindOfClades "GetHandleAndFilename"] SqlHandle Bdd
    if {[regexp "^Error" $SqlHandle]} { return $SqlHandle } 
    
    set LesFichiersACharger {}

    if {$Overwrite} {
	set TaxIdQuery [NotreOX]
	
	set ListOfClades [lsort -unique [BlastomicsClades $KindOfClades]] 
	
	foreach Clade $ListOfClades {
	    Espionne "Clade $Clade"
	    set OrgsFromClade($Clade) [CladeContentWithinOi2017 $Clade $KindOfClades]
	    Espionne $OrgsFromClade($Clade)
	    set Cardinal($Clade) [llength $OrgsFromClade($Clade)]
	    Espionne $Cardinal($Clade)
	    foreach TaxId $OrgsFromClade($Clade) {
		lappend AllTaxId $TaxId
		set OsFrom($TaxId) [Tax $TaxId "Name"]
		lappend LesCladesDuTaxId($TaxId) $Clade
	    }
	}
	set AllTaxId [lsort -unique -integer $AllTaxId]
	
	set PkClade 0
	set SqlPourClade {}
	foreach Clade $ListOfClades {
	    incr PkClade
	    lappend SqlPourClade "$PkClade\t$Clade"
	    set PkDuClade($Clade) $PkClade
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourClade dans "$RepSql/10_SqlPour-clade.txt"]
	
	set PkOrganism 0
	set SqlPourOrganism {}
	foreach TaxId [lsort -integer [array names OsFrom]] {
	    set Species $OsFrom($TaxId)
	    incr PkOrganism
	    lappend SqlPourOrganism "$PkOrganism\t$TaxId\t$Species"
	    set PkDuOrganismTaxId($TaxId)     $PkOrganism
	    set PkDuOrganismSpecies($Species) $PkOrganism
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourOrganism dans "$RepSql/20_SqlPour-organism.txt"]
	
	set SqlPourLnCladeOrganism {}
	foreach Clade $ListOfClades {
	    foreach TaxId $OrgsFromClade($Clade) {
		set PkOrganism $PkDuOrganismTaxId($TaxId)
		lappend SqlPourLnCladeOrganism $PkDuClade($Clade)\t$PkOrganism
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnCladeOrganism dans "$RepSql/30_SqlPour-ln_clade_organism.txt"]
	
	set SqlPourSapin {}
	foreach Clade $ListOfClades {
	    set PkSapin $PkDuClade($Clade)                            ;#rR je ne sais pas si peut y en avoir d'autres ???
	    lappend SqlPourSapin "$PkSapin\t$PkDuClade($Clade)"
	    lappend LesPkSapin $PkSapin
	    lappend LesCladesDuSapin($PkSapin) $Clade
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourSapin dans "$RepSql/40_SqlPour-sapin.txt"]
	
	set PkBranche 0
	set SqlPourBranche {}
	foreach PkSapin $LesPkSapin {
	    foreach Clade $LesCladesDuSapin($PkSapin) {
		set Largeur [llength $OrgsFromClade($Clade)]
		foreach IdBranche [NombresEntre 0 $Largeur] {
		    incr PkBranche
		    lappend SqlPourBranche $PkBranche\t$PkSapin\t$IdBranche\t$Largeur
		    set PkBrancheDe($PkSapin,$IdBranche) $PkBranche
		}
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourBranche dans "$RepSql/50_SqlPour-branche.txt"]
	
	set PkTaxobla 0
	set SqlPourTaxobla {}
	foreach Nom [ListeDesPABs] {
	    incr PkTaxobla
	    set PkTaxoblaDe($Nom) $PkTaxobla
	    set Ac [ExtraitInfo $Nom "AC:"]
	    set Id [ExtraitInfo $Nom "ID:"]
	    set Gn [ExtraitInfo $Nom "ValiGN:"] ; if {$Gn==""} { set Gn [ExtraitInfo $Nom "GN:"] }
	    set Gi [ExtraitInfo $Nom "GI:"]     ; if {$Gi==""} { set Gi [NIAG $Nom "G"] }
	    lappend SqlPourTaxobla "$PkTaxobla\t$Nom\ttaxobla\t$Ac\t$Id\t$Gn\t$Gi"
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourTaxobla dans "$RepSql/60_SqlPour-taxobla.txt"]
	
	
	set SqlPourLnOrganismTaxobla {}
	foreach Nom [ListeDesPABs] {
	    set PkTaxobla $PkTaxoblaDe($Nom)
	    foreach Clade $ListOfClades {
		set PkClade $PkDuClade($Clade)
		set NbHitsDansClade($PkTaxobla,$PkClade) 0
	    }
	    if {[FileAbsent [GscopeFile $Nom "taxobla"]]} { continue }
	    foreach Ligne [LesLignesDuFichier [GscopeFile $Nom "taxobla"]] {
		scan $Ligne "%f %f %d" Pn Score TaxId
		if {$Pn>0.001} { break }
		set Score [expr round($Score)]
		if { ! [info exists PkDuOrganismTaxId($TaxId)]} { continue }
		set PkOrganism $PkDuOrganismTaxId($TaxId)
		lappend SqlPourLnOrganismTaxobla "$PkTaxobla\t$PkOrganism"
		foreach Clade $LesCladesDuTaxId($TaxId) {
		    set PkClade $PkDuClade($Clade)
		    incr NbHitsDansClade($PkTaxobla,$PkClade)
		}
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnOrganismTaxobla dans "$RepSql/70_SqlPour-ln_organism_taxobla.txt"]
	
	set SqlPourLnBrancheTaxobla {}
	foreach {K NbHits} [array get NbHitsDansClade] {
	    lassign [split $K ","] PkTaxobla PkSapin
	    if { ! [info exists PkBrancheDe($PkSapin,$NbHits)]} {
		Espionne "not existing PkBrancheDe($PkSapin,$NbHits) PkTaxobla $PkTaxobla"
		continue
	    }
	    set PkBranche $PkBrancheDe($PkSapin,$NbHits)  ;#rR NbHits = IdBranche
	    lappend SqlPourLnBrancheTaxobla "$PkBranche\t$PkTaxobla"
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnBrancheTaxobla dans "$RepSql/80_SqlPour-ln_branche_taxobla.txt"]
    } else {
	set LesFichiersACharger [lsort [glob "$RepSql/*SqlPour-*"]]
    }
    foreach Fichier $LesFichiersACharger {
	set Queue [file tail $Fichier]
	if { ! [regexp {\-([^\.]+)\.} $Queue Match Table]} { continue }
	$SqlHandle copy abort $Table $Fichier "\t"
	Espionne "$Table done"
    }

    BlastomicsCreateIndex

    CanalSqlDisconnect
    return $Bdd
}

proc BlastomicsCreateIndex {{Bdd ""}} {
    if {$Bdd!=""} { CanalSql [ConnInfoForDatabase $Bdd] }
    SqlExec "CREATE INDEX iclade    ON clade     (pk_clade, nom)"
    SqlExec "CREATE INDEX iorganism ON organism  (pk_organism, taxid, specie)"
    SqlExec "CREATE INDEX itaxobla  ON taxobla   (pk_taxobla, gscopeid)"
    SqlExec "CREATE INDEX ibranche  ON branche   (pk_branche)" 
    SqlExec "CREATE INDEX isapin    ON sapin     (pk_sapin, pk_clade)"
    SqlExec "CREATE INDEX ilnco     ON ln_clade_organism   (pk_clade, pk_organism)"
    SqlExec "CREATE INDEX ilnot     ON ln_organism_taxobla (pk_taxobla, pk_organism)"
    SqlExec "CREATE INDEX ilbt      ON ln_branche_taxobla  (pk_branche, pk_taxobla)"
    if {$Bdd!=""} { CanalSqlDisconnect ; return "" }
    return [CanalSql]
}

proc BlastomicsClades {{Quoi ""} {GetWhat ""}} {
    #rR Est appelé par le python Blame ... en question de science ...

    #rR Merci de me mettre à jour
    set Possibles [lsort [list Archaea Bac8 Bac9 CMC Cilio TTLL JoyFungi]]

    set Clades {}

    if {[regexp -nocase "Possible" $Quoi]} {
	set Clades $Possibles
    }

    if {$Quoi=="Archaea"} {
	set Clades [PhylAr ListOf PhyOS]
    }
    if {$Quoi=="CMC"} {
	set Clades [CMC ListOf JoinedClade]
    }
    if {$Quoi=="JoyFungi"} {
	set Clades [Paraph $Quoi ListOf JoinedClade]
    }
    if {$Quoi=="Bac8"} {
	set Clades [Paraph $Quoi ListOf JoinedClade]
    }
    if {$Quoi=="Bac9"} {
	set Clades [Paraph $Quoi ListOf JoinedClade]
    }
    if {$Quoi=="TTLL"} {
	set Clades [TTLL ListOf JoinedClade]
    }
    if {$Quoi=="Cilio" || [OnTraite "CilioCarta" "Like"]} {
	if {[OnTraite "CilioCarta"]} {
	    set Clades [BilanCilio ListOf JoinedClade]
	} else {
	    set Clades [QuestionDeScience "CilioCarta" "ret BilanCilio ListOf JoinedClade"]
	}
    }
    if {$GetWhat=="GetList"} { return [split $Clades " "] }
    return $Clades
}


Index by: file name | procedure name | procedure call | annotation
File generated 2022-04-05 at 12:55.