Index by: file name |
procedure name |
procedure call |
annotation
gscope_blastomics.tcl
(annotations | original source)
#rR gscope_blastomics.tcl
proc NicoMapping {{Qui ""} {Quoi ""}} {
global NicoMapping
set Fichier "~scalzitti/These/G3PO_Benchmark/Data/ID_ensembl_new_version.csv"
if {[info exists NicoMapping($Qui,$Quoi)]} { return $NicoMapping($Qui,$Quoi) }
if {[info exists NicoMapping("EstCharge")]} { return "" }
set NicoMapping("EstCharge") 1
foreach Ligne [LesLignesDuFichier $Fichier] {
lassign [split $Ligne ";"] Id Acc IdT IdG
if {$Id=="ID_UNIPROT"} { continue }
foreach I [list Id Acc IdT IdG] {
lappend NicoMapping(ListOf,$I) [set $I]
foreach J [list Id Acc IdT IdG] {
set NicoMapping([set $I],$J) [set $J]
}
}
}
return [NicoMapping $Qui $Quoi]
}
proc CarExVerif {{Liste ""}} {
if {$Liste==""} { set Liste [ListeDesPABs] }
foreach Nom $Liste {
set GN [ExtraitInfo $Nom "ValiGN:"]
set NbNon 0
set NbOui 0
set Nb 0
foreach Id [bbsc $Nom listof id] {
set Cds [CarEx $Id cds]
if {$Cds==""} {
incr NbNon
} else {
incr NbOui
}
incr Nb
}
Espionne [format "%s %-12s %4d %s %5d %s %s %5d %3d%s de oui" $Nom $GN $NbNon non $NbOui oui sur $Nb [expr (100*$NbOui)/$Nb] "%"]
}
return finCarExVerif
}
proc CarEx {{Qui ""} {Quoi ""} {Quid ""}} {
global CarEx
set Qui [string toupper $Qui]
set Quoi [string tolower $Quoi]
set Quid [string tolower $Quid]
if {$Qui=="" && $Quoi=="" && $Quid==""} { set Qui "HELP" }
if {[info exists CarEx($Qui,$Quoi,$Quid)]} { return $CarEx($Qui,$Quoi,$Quid) }
if {[info exists CarEx("EstCharge")]} {
if {$Quoi=="exon" || $Quoi=="intron"} {
if {[info exists CarEx($Qui,${Quoi}_$Quid,limits)]} {
set Limits $CarEx($Qui,${Quoi}_$Quid,limits)
if {[info exists CarEx($Qui,sg,filename)]} {
set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)]
return [string range n$Seq {*}$Limits]
}
}
}
if {$Quoi=="" || $Quoi=="cds"} {
if {[info exists CarEx($Qui,exon,limits)]} {
set Limits $CarEx($Qui,exon,limits)
if {[info exists CarEx($Qui,sg,filename)]} {
set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)]
set Cds ""
foreach Limit $Limits {
lassign $Limit D F
append Cds [string range n$Seq $D $F]
}
return $Cds
}
}
}
return ""
}
set CarEx("EstCharge") 1
set LeHelp {}
lappend LeHelp "CarEx Help"
lappend LeHelp "CarEx listof id"
lappend LeHelp "CarEx T1JH94_STRMM ce filename ;# (carte exonique)"
lappend LeHelp "CarEx T1JH94_STRMM sg filename ;# (séquence génomique)"
lappend LeHelp "CarEx T1JH94_STRMM listof exon ;# 1 2 3 4"
lappend LeHelp "CarEx T1JH94_STRMM listof intron ;# 1 2 3"
lappend LeHelp "CarEx T1JH94_STRMM exon limits ;# {1 320} {380 466} {534 665} {776 934}"
lappend LeHelp "CarEx T1JH94_STRMM exon_3 start ;# 534"
lappend LeHelp "CarEx T1JH94_STRMM exon_3 end ;# 665"
lappend LeHelp "CarEx T1JH94_STRMM exon_3 limits ;# 534 665"
lappend LeHelp "CarEx T1JH94_STRMM exon 3 ;# GATAGTTTAATTCGAAAAAAAT... etc"
lappend LeHelp "CarEx T1JH94_STRMM intron limits"
lappend LeHelp "CarEx T1JH94_STRMM intron_2 start"
lappend LeHelp "CarEx T1JH94_STRMM intron_2 end"
lappend LeHelp "CarEx T1JH94_STRMM intron_2 limits"
lappend LeHelp "CarEx T1JH94_STRMM intron 2"
lappend LeHelp "CarEx T1JH94_STRMM cds ;# il y a meme le CDS"
lappend LeHelp "CarEx T1JH94_STRMM ;# ... pour rien"
set CarEx(HELP,,) [join $LeHelp "\n"]
set SeqGenDir "/home/scalzitti/These/G3PO_Benchmark/Data/Fasta/0Kb"
set CarExoDir "/home/scalzitti/These/G3PO_Benchmark/Data/Structures_map/References"
set SeqGenHDir "/home/scalzitti/These/Part2_IA/Data/Structure_map/R98_0919"
set CarExoHDir "/home/scalzitti/These/Part2_IA/Data/Sequences_genomiques/R98_0919/References/0Kb"
foreach FicSeqGen [lsort [glob -nocomplain "$SeqGenDir/*.fasta"]] {
set Queue [file tail $FicSeqGen]
regsub "\.fasta$" $Queue "" Id
lappend CarEx(LISTOF,sg,id) $Id
set CarEx($Id,sg,filename) $FicSeqGen
}
foreach FicCarExo [lsort [glob -nocomplain "$CarExoDir/*.gar"]] {
set Queue [file tail $FicCarExo]
regsub "\.gar$" $Queue "" Id
lappend CarEx(LISTOF,ce,id) $Id
set CarEx($Id,ce,filename) $FicCarExo
set Nom [bbsc $Id nom]
set Alias [ExtraitInfo $Nom "ValiGN:"]
lappend CarEx(LISTOF,nom,) $Nom
lappend CarEx($Id,listof,nom) $Nom
set NOM [string toupper $Nom]
lappend CarEx($NOM,listof,id) $Id
lappend CarEx($Id,listof,nom) $Nom
foreach Ligne [LesLignesDuFichier $FicCarExo] {
if { ! [regexp -nocase {^([a-z]+)_([0-9]+)[ \t]+([0-9]+)[ \t]+([0-9]+)} $Ligne Match EI I D F]} { continue }
set Reverse [regexp {\-1} $Ligne]
set ei [string tolower $EI]
lappend CarEx($Id,listof,$ei) $I
set W [Maxi $D $F]
set D [Mini $D $F]
set F $W
set CarEx($Id,${ei}_$I,limits) "$D $F"
set CarEx($Id,${ei}_$I,start) $D
set CarEx($Id,${ei}_$I,end) $F
lappend CarEx($Id,$ei,i) $I
lappend CarEx($Id,$ei,limits) "$D $F"
}
if {$Reverse} {
foreach ei [list exon intron] {
if { ! [info exists CarEx($Id,$ei,i)]} { continue }
set CarEx($Id,$ei,limits) [lreverse $CarEx($Id,$ei,limits)]
set N [llength $CarEx($Id,$ei,i)]
foreach What [list limits start end] {
set LesNew {}
foreach I $CarEx($Id,$ei,i) {
lappend LesNew $CarEx($Id,${ei}_[expr $N-$I+1],$What)
}
foreach I $CarEx($Id,$ei,i) {
set $CarEx($Id,${ei}_$I,$What) [lindex $LesNew $I-1]
}
}
}
}
}
foreach Id $CarEx(LISTOF,ce,id) {
if {[info exists CarEx($Id,listof,nom)]} { set CarEx($Id,listof,nom) [lsort -unique $CarEx($Id,listof,nom)] }
}
set CarEx(LISTOF,nom,) [lsort -unique $CarEx(LISTOF,nom,)]
set CarEx(LISTOF,ce,) $CarEx(LISTOF,ce,id)
set CarEx(LISTOF,id,) $CarEx(LISTOF,ce,id)
return [CarEx $Qui $Quoi $Quid]
}
proc CreateMacsimXmlNuc {} {
NousAllonsAuBoulot [RepertoireDuGenome]
file mkdir macsimXmlNuc
file mkdir macsimXmlNuc3
file mkdir macsimXmlNucProt3
foreach Nom [ListeDesPABs] {
Espionne $Nom
foreach Type [list Nuc Nuc3 NucProt3] {
set Msf [GscopeFile $Nom msf$Type]
set Mac [GscopeFile $Nom macsimXml$Type]
exec clustalw $Msf -convert -output=macsims -outfile=$Mac -outorder=input
}
}
OnRevientDuBoulot
return Bravo
}
proc CreateNucAliFromProtAliBBSCPourTous {} {
foreach Nom [ListeDesPABs] {
CreateNucAliFromProtAliBBSC $Nom
}
return Bravo
}
proc CreateNucAliFromProtAliBBSC Nom {
file mkdir "[RepertoireDuGenome]/nucalitfa"
file mkdir "[RepertoireDuGenome]/msfNuc"
file mkdir "[RepertoireDuGenome]/msfNuc3"
file mkdir "[RepertoireDuGenome]/msfNucProt3"
set FichierMsfProt [GscopeFile $Nom msfProt]
if {[FileAbsent $FichierMsfProt]} { return "" }
set MsfProt [ContenuDuFichier $FichierMsfProt]
if {[regexp "^NoSequencesToAlign" $MsfProt]} { return "" }
set FichierMsfTfa [TmpFile].tfa
exec clustalw $FichierMsfProt -convert -output=FASTA -outfile=$FichierMsfTfa
set LeMsfTfaNuc {}
foreach Access [LaSequenceDuTFAs $FichierMsfTfa "LaListeDesAccess"] {
set SeqProtTfa [QueLaSequenceDuTexteTFA [bbsc $Access prottfa content]]
set SeqNucTfa [QueLaSequenceDuTexteTFA [bbsc $Access nuctfa content]]
Espionne $Nom $Access [expr [string length $SeqProtTfa]*3] [string length $SeqNucTfa]
if {$SeqNucTfa==""} { Espionne "$Access est vide" ; continue }
set I -1
foreach P [split $SeqProtTfa ""] {a b c} [split $SeqNucTfa ""] {
incr I
set Codon "$a$b$c"
set CodonEn($I) $Codon
#Espionne $Access $I $P [AAduCodon $Codon] $Codon
}
set TfaProt [LaSequenceDuTFAs $FichierMsfTfa $Access]
set SeqProt [QueLaSequenceDuTexteTFA $TfaProt]
set LaSeqNucAlignee {}
set I -1
foreach A [split $SeqProt ""] {
if {$A=="-" || $A=="."} { lappend LaSeqNucAlignee "---" ; continue }
incr I
#Espionne $I $A
if { ! [info exists CodonEn($I)]} { set CodonEn($I) "NNN" }
set Codon [string toupper $CodonEn($I)]
lappend LaSeqNucAlignee $Codon
}
lappend LeMsfTfaNuc ">$Access homolog from $Nom nucaligment from protaligment"
lappend LeMsfTfaNuc [join $LaSeqNucAlignee ""]
}
set FichierNucAliTfa [SauveLesLignes $LeMsfTfaNuc dans [GscopeFile $Nom "nucalitfa"]]
set FichierMsfNuc [GscopeFile $Nom "msfNuc"]
exec clustalw $FichierNucAliTfa -convert -output=gscope -outfile=$FichierMsfNuc
MsfOnOneLine $Nom msfNuc msfNuc3 "" ShowCodons
MsfOnOneLine $Nom msfNuc msfNucProt3 "" ShowCodons
return $FichierMsfNuc
}
proc bbsc {{Qui ""} {Quoi ""} {Quid ""}} {
global bbsc
if {[string equal -nocase $Qui "Help"] || ($Qui=="" && $Quoi=="")} {
set LeHelp {}
lappend LeHelp "bbsc help"
lappend LeHelp "bbsc listof nom"
lappend LeHelp "bbsc listof acid"
lappend LeHelp "bbsc listof ac"
lappend LeHelp "bbsc listof id"
lappend LeHelp "bbsc listof human acid"
lappend LeHelp "bbsc listof human ac"
lappend LeHelp "bbsc listof human id"
lappend LeHelp "bbsc listof cds count"
lappend LeHelp "bbsc listof human acid"
lappend LeHelp "bbsc BBSC01 aliprot"
lappend LeHelp "bbsc BBSC01 alinuc"
lappend LeHelp "bbsc BBSC01 msf ... or msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf"
lappend LeHelp "bbsc BBSC01 acid"
lappend LeHelp "bbsc BBSC01 ac"
lappend LeHelp "bbsc BBSC01 id"
lappend LeHelp "bbsc BBSC01 cds count"
lappend LeHelp "bbsc BBSC01 listof org"
lappend LeHelp "bbsc BBSC01 listof copsaliprot"
lappend LeHelp "bbsc BBSC01 listof acid"
lappend LeHelp "bbsc BBSC01 listof ac"
lappend LeHelp "bbsc BBSC01 listof id"
lappend LeHelp "bbsc BBSC01 listof cds"
lappend LeHelp "bbsc BBSC01 listof missingcds"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nom"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE acid"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE ac"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE id"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa filename"
lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa content"
lappend LeHelp "bbsc BBS10_MOUSE nom"
lappend LeHelp "bbsc BBS10_MOUSE acid"
lappend LeHelp "bbsc BBS10_MOUSE ac"
lappend LeHelp "bbsc BBS10_MOUSE id"
lappend LeHelp "bbsc BBS10_MOUSE nuctfa filename"
lappend LeHelp "bbsc BBS10_MOUSE nuctfa content"
lappend LeHelp "bbsc Q9DBI2 nom"
lappend LeHelp "bbsc Q9DBI2 acid"
lappend LeHelp "bbsc Q9DBI2 ac"
lappend LeHelp "bbsc Q9DBI2 id"
lappend LeHelp "bbsc Q9DBI2 nuctfa filename"
lappend LeHelp "bbsc Q9DBI2 nuctfa content"
lappend LeHelp "bbsc MOUSE listof acid"
lappend LeHelp "bbsc listof content"
return [join $LeHelp "\n"]
}
set LesContent [list aliprot alinuc nuctfa prottfa msf msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf]
set bbsc(listof,content,) $LesContent
if {[info exists bbsc($Qui,$Quoi,$Quid)]} { return $bbsc($Qui,$Quoi,$Quid) }
if {[info exists bbsc("EstCharge")]} {
if {$Quid=="" && [lsearch $LesContent $Quoi]>=0 } { set Quid "content" }
if {[info exists bbsc($Qui,acid,)] && $bbsc($Qui,acid,)!=$Qui} {
set AcId $bbsc($Qui,acid,)
if {[info exists bbsc($AcId,$Quoi,$Quid)]} {
return [bbsc $AcId $Quoi $Quid]
}
if {$Quid=="content" && [info exists bbsc($AcId,nom,)] && [info exists bbsc($bbsc($AcId,nom,),$Quoi,filename)]} {
return [ContenuDuFichier $bbsc($bbsc($AcId,nom,),$Quoi,filename)]
}
}
if {[info exists bbsc($Qui,nom,)]} {
if {[info exists bbsc($bbsc($Qui,nom,),$Quoi,$Quid)]} {
return [bbsc $bbsc($Qui,nom,) $Quoi $Quid]
}
if {$Quid=="content" && [info exists bbsc($bbsc($Qui,nom,),$Quoi,filename)]} {
return [bbsc $bbsc($Qui,nom,) $Quoi $Quid]
}
}
if {$Quid=="content" && [info exists bbsc($Qui,$Quoi,filename)]} {
return [ContenuDuFichier $bbsc($Qui,$Quoi,filename)]
}
return ""
}
set bbsc("EstCharge") 1
set RG [RepertoireDuGenome]
foreach Nom [ListeDesPABs] {
lappend bbsc(listof,nom,) $Nom
foreach Content $LesContent {
set bbsc($Nom,$Content,filename) "$RG/$Content/$Nom"
}
set FicAliprot "$RG/aliprot/$Nom"
set FicAlinuc "$RG/alinuc/$Nom"
set bbsc($Nom,alinuc,filename) $FicAliprot
set LesCopAli [LaSequenceDuTFAs $FicAliprot "LaListeDesAccess"]
set bbsc($Nom,listof,copsaliprot) $LesCopAli
set bbsc($Nom,acid,) [lindex $LesCopAli 0]
set RepCopsNuctfa "$RG/copsnuctfa/$Nom"
set RepCopsProttfa "$RG/copsprottfa/$Nom"
set bbsc($Nom,listof,cds) {}
set bbsc($Nom,listof,missingcds) {}
set AcIdHuman ""
foreach AcId $LesCopAli {
lassign [split $AcId "_"] A I O
set Ac $A
set Id ${I}_$O
lappend bbsc($Nom,listof,acid) $AcId
lappend bbsc($Nom,listof,ac) $Ac
lappend bbsc($Nom,listof,id) $Id
if {$AcIdHuman==""} {
set AcIdHuman $AcId
set IdHuman $Id
set AcHuman $Ac
set bbsc($Nom,acid,) $AcIdHuman
set bbsc($Nom,ac,) $AcHuman
set bbsc($Nom,id,) $IdHuman
lappend bbsc(listof,human,acid) $AcIdHuman
lappend bbsc(listof,human,ac) $AcHuman
lappend bbsc(listof,human,id) $IdHuman
}
lappend bbsc($Nom,listof,org) $O
lappend bbsc(listof,org,) $O
lappend bbsc($O,listof,nom) $Nom
lappend bbsc($O,listof,acid) $AcId
lappend bbsc($AcId,nom,) $Nom
lappend bbsc($AcId,acid,) $AcId
lappend bbsc($AcId,ac,) $Ac
lappend bbsc($AcId,id,) $Id
lappend bbsc($Ac,acid,) $AcId
lappend bbsc($Id,acid,) $AcId
lappend bbsc(listof,acid,) $AcId
lappend bbsc(listof,ac,) $Ac
lappend bbsc(listof,id,) $Id
set FicCopProttfa "$RepCopsProttfa/$AcId"
set bbsc($AcId,prottfa,filename) $FicCopProttfa
set FicCopNuctfa "$RepCopsNuctfa/$AcId"
if {[file exists $FicCopNuctfa]} {
lappend bbsc($Nom,listof,cds) $AcId
set bbsc($AcId,nuctfa,filename) $FicCopNuctfa
} else {
lappend bbsc($Nom,listof,missingcds) $AcId
}
}
set P [llength $bbsc($Nom,listof,cds)]
set A [llength $bbsc($Nom,listof,missingcds)]
set T [expr $A+$P]
set PC [expr (100*$A)/$T]
set bbsc($Nom,cds,count) [format "%4d %s %3d %s %3d %s %3d%s %s" $T cops $P cds $A missingcds $PC % missing]
lappend bbsc(listof,cds,count) "$Nom $bbsc($Nom,cds,count)"
set bbsc($Nom,aliprot,filename) "$RG/aliprot/$Nom"
set bbsc($Nom,alinuc,filename) "$RG/alinuc/$Nom"
}
return [bbsc $Qui $Quoi $Quid]
}
proc CdsFromRefseq Refseq {
JeMeSignale
set GB [eFetchREST nucleotide $Refseq]
DecortiqueGenBank OS OC SeqADN $GB
if { ! [regexp {\n +CDS +([^\n]+)\n} $GB Match Limits]} {
Espionne $GB
FaireLire "pas de cds pour $Refseq !!!!!"
return ""
}
set Cds ""
regsub -all {[<>]} $Limits "" Limits
if {[regexp {[^0-9\,\.]} $Limits]} {
Espionne $GB
Espionne $Limits
FaireLire "bad limits $Limits !!!!!"
return ""
}
foreach Limit [split $Limits ","] {
lassign [split $Limit "."] D Bidon F
append Cds [string range $SeqADN $D-1 $F-1]
}
return $Cds
}
proc MrnaDesCopainsPourTous {{Liste ""}} {
set LaSortie {}
set CopsNucTfaDir "[RepertoireDuGenome]/copsnuctfa"
set CopsProtTfaDir "[RepertoireDuGenome]/copsprottfa"
file mkdir "[RepertoireDuGenome]/copsnuctfa"
if {$Liste==""} { set Liste [ListeDesPABs] }
foreach Nom $Liste {
set MesNucTfa "$CopsNucTfaDir/$Nom"
file mkdir $MesNucTfa
set LesTfa {}
foreach FicProtTfa [lsort [glob -nocomplain "$CopsProtTfaDir/$Nom/*"]] {
set Entete [EnteteDuFichierTFA $FicProtTfa]
regsub "^>" $Entete "" Entete
scan $Entete "%s" Copain
lassign [split $Copain "_"] A B O
set AC $A
set ID ${B}_$O
set FicNucTfa "$MesNucTfa/$Copain"
if {[file exists $FicNucTfa]} { continue }
Espionne ya pas $FicNucTfa
#set LesTfa [MrnasFrom $AC]
set Refseqs [IDMapping ACC+ID REFSEQ_NT $AC]
foreach LesRefseq $Refseqs {
foreach Refseq $LesRefseq {
if {$Refseq!=""} {
set Cds [CdsFromRefseq $Refseq]
set Entete "$Refseq CDS from REFSEQ_NT $Refseq from ACC $AC from $Nom"
set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
lappend LesTfa $Tfa
}
}
}
#rR On prend aussi les info de Nicolas !!!
set IdT [NicoMapping $AC IdT]
if {$IdT!=""} {
set Cds [CdsFromDec2016 $IdT]
set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom"
set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
lappend LesTfa $Tfa
} else {
set IdT [NicoMapping $ID IdT]
if {$IdT!=""} {
set Cds [CdsFromDec2016 $IdT]
set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom"
set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"]
lappend LesTfa $Tfa
}
}
Espionne $AC $ID =$IdT=
if {$LesTfa!={}} {
set Pro [QueLaSequenceDuFichierTFA $FicProtTfa]
set Max [string length $Pro]
set LesMismatch {}
foreach Tfa $LesTfa {
set Ent [EnteteDuTexteTFA $Tfa]
set Nuc [QueLaSequenceDuTexteTFA $Tfa]
set OriLength [expr [string length $Nuc]/3]
set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]]
set NucTraduit [SeqNucToSeqPro $Nuc]
set N 0
foreach P [split $Pro ""] T [split $NucTraduit ""] {
if {$P==$T} { incr N }
}
set Mismatch [expr $Max -$N]
set IsoformDuMismatch($Mismatch) $Tfa
lappend LesMismatch $Mismatch
}
set LesMismatch [lsort $LesMismatch]
set Mismatch [lindex $LesMismatch 0]
set Tfa $IsoformDuMismatch($Mismatch)
set Seq [QueLaSequenceDuTexteTFA $Tfa]
set EnteteIsoform [EnteteDuTexteTFA $Tfa]
set Score [expr (100.*$Mismatch)/$Max]
regsub "^>" $EnteteIsoform "$Entete " EnteteMrna
append EnteteMrna " $Mismatch/$Max mismatch [format {%5.2f} $Score]%"
if {$Score>9} { continue }
Espionne Youpiiiiiiiii $EnteteMrna $FicNucTfa
Sauve [SequenceFormatTFA $Seq $EnteteMrna nucbrut] dans $FicNucTfa
lappend LaSortie "$Nom $Copain $EnteteIsoform"
}
}
}
return $LaSortie
}
proc MrnaFromPourTous {} {
set LaSortie {}
file mkdir "[RepertoireDuGenome]/nuctfa"
foreach Nom [ListeDesPABs] {
set FicNucTfa [GscopeFile $Nom "nuctfa"]
set FicProtTfa [GscopeFile $Nom "prottfa"]
if {[file exists $FicNucTfa]} {
set Nuc [QueLaSequenceDuTFA $FicNucTfa]
regsub -nocase {(TAA|TAG|TGA)$} $Nuc "" Nuc
set Pro [QueLaSequenceDuTFA $FicProtTfa]
if {[string length $Nuc]!=[string length $Pro]*3} {
FaireLire "$Nom [string length $Nuc]!=[string length $Pro]*3"
}
set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]]
if {[string length $Nuc]==[string length $Pro]*3} {
set NucTraduit [SeqNucToSeqPro $Nuc]
if {$Pro==$NucTraduit} {
#FaireLire "j'ai coupe c'est bon"
} else {
set N 0
foreach P [split $Pro ""] T [split $NucTraduit ""] {
if {$P!=$T} { incr N }
}
FaireLire "$N ratés pour $Nom\n$Pro\n$NucTraduit"
}
}
continue
}
set LesTfa [MrnasFrom $Nom]
if {$LesTfa=={}} { Espionne "je ne rouve rien du tout pour $Nom" ; continue }
set Seq [MrnaFrom $Nom]
set Seq [string toupper $Seq]
lappend LaSortie $Nom
set AC [ExtraitInfo $Nom "AC:"]
set ID [ExtraitInfo $Nom "AC:"]
Sauve [SequenceFormatTFA $Seq "$Nom ${AC}_$ID" nucbrut] dans [GscopeFile $Nom "nuctfa"]
}
return $LaSortie
}
proc CdsFromDec2016 ENST {
set Seq [ContenuDuFichier "https://dec2016.rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"]
return $Seq
}
proc MrnasFrom {Qui {FirstSeqOnly ""}} {
#rR On rend les TFA de tous les mRNA de Qui
#rR sauf si FirstSeqOnly
set FirstSeqOnly [string equal -nocase "FirstSeqOnly" $FirstSeqOnly]
set LesMrnas {}
set Nom $Qui
if {[EstUnPAB $Qui]} { set Qui [ExtraitInfo $Qui "AC:"] }
return $Seq
set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui] 0]
if {$CCDSs=={}} {
#rR il se peut qu'il y ait des extension -1 -2 ...
foreach I [NombresEntre 0 9] {
set Ext "-$I"
if {$I==0} { set Ext "" }
set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui$Ext] 0]
if {$CCDSs!={}} { break }
}
if {$CCDSs=={}} { return "" }
}
foreach CCDS $CCDSs {
set ENSTs [lindex [IDMapping CCDS ENSEMBL_TRS $CCDS] 0]
if {$ENSTs=={}} { continue }
foreach ENST $ENSTs {
#Espionne $Nom $Qui $CCDS $ENST
#Espionne ContenuDuFichier "\"https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain\""
set Seq [ContenuDuFichier "https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"]
if {$Seq!=""} {
if {$FirstSeqOnly} { return $Seq }
set Entete "$ENST mRNA from CCDS $CCDS from ACC $Qui from $Nom"
lappend LesMrnas [SequenceFormatTFA $Seq $Entete "nucbrut"]
}
}
}
return $LesMrnas
}
#rR voir plus bas BlastAli BlastIndel BlastStat et Blastomics
proc DGB {} {
set Xrefs ""
set Texte [lindex [eFetchREST protein "np_001297087"] 0]
DecortiqueGenBank OS OC Seq $Texte 0 0 "" "" DbXrefs
Espionne $DbXrefs
Espionne $Seq
return DGB
}
proc AliIndel {{Qui ""} {Quoi ""} {FicAli ""}} {
global AliIndel
#rR Attention si FicAli a changé on recharge tout
if {$FicAli!=""} {
if {[info exists AliIndel("FicAli",)] && $AliIndel("FicAli",)!=$FicAli} {
if {[info exists AliIndel]} { unset AliIndel }
}
set AliIndel("FicAliMemo") $FicAli
}
if {[info exists AliIndel($Qui,$Quoi)]} { return $AliIndel($Qui,$Quoi) }
if {[info exists AliIndel("EstCharge")]} { return "" }
set AliIndel("EstCharge") 1
set AliIndel(FicAli,) $AliIndel("FicAliMemo")
set Query ""
foreach Access [LaSequenceDuTFAs $AliIndel(FicAli,) "LaListeDesAccess"] {
set SeqGap [LaSequenceDuTFAs $AliIndel(FicAli,) $Access]
regsub {^[^_]+_} $Access "" AC
regsub -all {[^A-Z]} $SeqGap "" Seq
set LE [string length $SeqGap]
lappend AliIndel(ListOf,AC) $AC
set AliIndel($AC,LE) $LE
set AliIndel($AC,SS) $SeqGap
set AliIndel($AC,OS) "OS"; #[GeneQuid UniprotData $Access OS]
set AliIndel($AC,DE) "DE"; #[GeneQuid UniprotData $Access DE]
if {$Query==""} {
set AliIndel(Query,LE) $LE
set AliIndel(Query,SQ) $Seq
set AliIndel(Query,GQ) $SeqGap
}
}
return [AliIndel $Qui $Quoi]
}
proc AliStatPourTous {} {
set LaSortie {}
foreach Nom [ListeDesPABs] {
set R [AliStat $Nom]
lappend LaSortie $R
}
return $LaSortie
}
proc AliStat {{FichierAli ""} {CouOut ""} {PilOut ""}} {
#rR sur le modele de BlastStat qui créait un ali avec blast
if {[EstUnPAB $FichierAli]} {
set Nom $FichierAli
set FichierAli [GscopeFile $Nom "aliprot"]
set RepCouvert "[RepertoireDuGenome]/Couvert"
set RepPiliers "[RepertoireDuGenome]/Piliers"
file mkdir $RepCouvert
file mkdir $RepPiliers
set AC [ExtraitInfo $Nom "AC:"]
set ID [ExtraitInfo $Nom "ID:"]
set ACID "${AC}_$ID"
if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$ACID.csv" }
if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$ACID.csv" }
} else {
if {$CouOut==""} { set CouOut $FichierAli.Couvert.csv }
if {$PilOut==""} { set PilOut $FichierAli.Piliers.csv }
}
AliIndel "" "" $FichierAli
set QL [AliIndel Query LE]
foreach I [NombresEntre 1 $QL] {
set TaillePilier($I) 0
}
set PN 0.0
foreach Access [AliIndel ListOf AC $FichierAli] {
set LE [AliIndel $Access LE]
set SS [AliIndel $Access SS]
set DE [AliIndel $Access DE] ; regsub -all ";" $DE "," DE
set OS [AliIndel $Access OS]
set LesZones [regexp -all -indices -inline {[A-Z]+} $SS]
set Couverture 0
foreach Zone $LesZones {
lassign $Zone D F
set LZ [expr $F-$D+1]
incr Couverture $LZ
foreach I [NombresEntre $D $F] {
incr TaillePilier($I)
}
}
set PC [expr round(100.*$Couverture/$LE)]
set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE]
lappend LesCouvert [join $LaLigne ";"]
}
set Max 0
foreach I [NombresEntre 1 $QL] {
set T $TaillePilier($I)
set Max [Maxi $Max $T]
}
foreach I [NombresEntre 1 $QL] {
set T $TaillePilier($I)
set L [expr (200*$T)/$Max + 1]
set Ligne [string repeat "*" $L]
#Espionne [format %4d $I] $Ligne
lappend LesPiliers "$I;$T"
}
if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut }
if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut }
return [list $CouOut $PilOut]
}
proc InformeBBSCPourTous {} {
file mkdir "[RepertoireDuGenome]/infos"
foreach Nom [ListeDesPABs] {
#Espionne $Nom
set F [GscopeFile $Nom prottfa]
set TFA [ContenuDuFichier $F]
scan $TFA "%s %s" N AccessId
lassign [split $AccessId "_"] Access I H
if {$H!="HUMAN"} { FaireLire "$AccessId pas bon" }
set ClesEmbl [GeneQuid UniprotData $Access "+AC,+ID,+GN,+DE"]
set AC ""
set ID ""
set GN ""
set DE ""
foreach Ligne [split $ClesEmbl "\n"] {
if { ! [regexp {^([A-Z][A-Z]) (.*)$} $Ligne Match K V]} { FaireLire $Ligne; continue }
if {[set $K]!=""} { continue }
set $K $V
}
regsub {;.*$} $AC "" AC
regsub { .*$} $ID "" ID
if {$Access!=$AC} {
FaireLire "$Access\n$ClesEmbl"
}
set OriId ${I}_$H
set BetterId ""
if {$ID!="${I}_$H"} {
#FaireLire "Better id $ID for $OriId"
set BetterId $ID
}
Espionne $AC $Access
Espionne $ID $OriId
regexp {Name=([^; ]+)[; ]} $GN Match GN
regexp {Full=([^;]+);} $DE Match DE
Espionne $GN
Espionne $DE
InformeSansDemander $Nom "AC: $AC\nID: $OriId\nGN: $GN\nDE: $DE\nValiGN: $GN\nValiDE: $DE"
if {$BetterId!=""} { InformeSansDemander $Nom "BetterID: $ID" }
}
return bravo
}
proc CreateMsfAndMacsimFromTfaPourTous {} {
file mkdir "[RepertoireDuGenome]/msf"
file mkdir "[RepertoireDuGenome]/macsimXml"
file mkdir "[RepertoireDuGenome]/macsimRsf"
foreach Nom [ListeDesPABs] {
Espionne $Nom
set F [GscopeFile $Nom aliprot]
set M [GscopeFile $Nom msf]
set X [GscopeFile $Nom macsimXml]
set R [GscopeFile $Nom macsimRsf]
exec clustalw $F -convert -output=gscope -outfile=$M
#exec clustalw $F -convert -output=macsim -outfile=$X
#exec clustalw $F -convert -output=RSF -outfile=$R
}
}
proc CreateProjectBBSC {} {
set Rep "/genomics/link/BBSC"
NousAllonsAuBoulot $Rep
file mkdir aliprot
file mkdir fiches
file mkdir prottfa
file mkdir copsprottfagap
file mkdir copsprottfa
set PF "BBSC"
set I 0
foreach FicAliprot [lsort [glob -nocomplain "DeKirsley/*.tfa"]] {
incr I
set Nom [format "%s%2.2d" $PF $I]
set FicAli "aliprot/$Nom"
file copy -force $FicAliprot $FicAli
set Human ""
foreach Access [LaSequenceDuTFAs $FicAli "LaListeDesAccess"] {
Espionne $Access
set SeqGap [QueLaSequenceDuTFAs $FicAli $Access]
regsub -all {[-\.~]} $SeqGap "" Seq
if {$Human==""} {
set Human $Access
set TFA [SequenceFormatTFA $Seq "$Nom $Human" protbrut]
Sauve $TFA dans "prottfa/$Nom"
}
file mkdir copsprottfa/$Nom
file mkdir copsprottfagap/$Nom
set TFA [SequenceFormatTFA $Seq "$Access homolog to $Human $Nom" protbrut]
set TFAGap [SequenceFormatTFA $SeqGap "$Access homolog to $Human $Nom" protbrut]
Sauve $TFA dans "copsprottfa/$Nom/$Access"
Sauve $TFAGap dans "copsprottfagap/$Nom/$Access"
}
lappend LesNoms $Nom
}
CreeBornesDesPABsTroisGradins 1 $I $PF "" "" "2.2d" "."
OnRevientDuBoulot
return CreateProjectBBSC
}
#rR ici on a ce qu'il faut pour traiter les blast de Claudine ... BlastAli BlastIndel BlastStat
proc BlastAliStatPourTous {} {
set LaSortie {}
foreach Nom [ListeDesPABs] {
#lappend LaSortie [BlastAli $Nom]
lappend LaSortie [BlastStat $Nom]
}
return $LaSortie
}
proc OrganiseLesToposDeClaudine {} {
set LaSortie {}
if {0} {
foreach Nom [ListeDesPABs] {
set Embl [ContenuDuFichier [GscopeFile $Nom protembl]]
if { ! [regexp {ID ([^ ]+)( |$)} $Embl Match ID]} { FaireLire "pas de ID dans $Nom" }
if { ! [regexp {\nAC ([^ ;]+)(;| |$)} $Embl Match AC]} { FaireLire "pas de AC dans $Nom" }
if { ! [regexp {\nOX ([^ ;]+)(;| |$)} $Embl Match OX]} { FaireLire "pas de OX dans $Nom" }
regsub -all {[^0-9]} $OX "" OX
InformeSansDemander $Nom "=ID: $ID"
InformeSansDemander $Nom "=Alias: $ID"
InformeSansDemander $Nom "=AC: $AC"
InformeSansDemander $Nom "=OX: $OX"
lappend LaSortie "$Nom $ID $AC $OX"
}
return $LaSortie
}
set RepBlastOri "[RepertoireDuGenome]/blastOri"
set RepTfaFromBOri "[RepertoireDuGenome]/tfaFromBOri"
set RepMemo "[RepertoireDuGenome]/memo"
file mkdir $RepBlastOri
file mkdir $RepTfaFromBOri
file mkdir $RepMemo
NousAllonsAuBoulot [RepertoireDuGenome]
foreach Nom [ListeDesPABs] {
set ID [ExtraitInfo $Nom "ID:"]
set RepTravail ${ID}_Arch
set BlastOriOri "$RepTravail/${ID}_Arch.blast"
Espionne $BlastOriOri
set BlastOri [GscopeFile $Nom "blastOri"]
file copy -force $BlastOriOri $BlastOri
lappend LaSortie $BlastOri
}
OnRevientDuBoulot
return $LaSortie
}
proc LesOsDesAcDeClaudine {} {
foreach Ligne [LesLignesDuFichier "/genomics/link/Topos/TOPO6B/LesAccessAvecCaca.txt"] {
regsub {>} $Ligne "" Ligne
regsub {_[^_]+$} $Ligne "" Ligne
#Espionne $Ligne
lappend LesBonsAccess $Ligne
}
set Retour [GeneQuid UniprotData [join $LesBonsAccess ","] "+QY,+OS"]
set LeOsCourant {}
set OldQY ""
set LaSortie {}
foreach Ligne [split $Retour "\n"] {
if {[regexp {QY (.*)$} $Ligne Match Access]} {
if {$OldQY!=""} {
set OsCourant [join $LeOsCourant " "]
if {$OsCourant==""} {
regsub {_.+$} $Access "" Mieux
set OS [join [GeneQuid UniprotData $Mieux OS] " "]
if {$OS!=""} {
set OsCourant $OS
} else {
Espionne "Rien pour $Access" }
}
lappend LaSortie "$OldQY $OsCourant"
set LeOsCourant {}
}
set OldQY $Access
}
if {[regexp {OS (.*)$} $Ligne Match OS]} {
set OS [string trim $OS]
lappend LeOsCourant $OS
}
}
set Fichier [SauveLesLignes $LaSortie dans "/genomics/link/Topos/TOPO6B/QyAc.txt"]
return $Fichier
}
proc BlastStat {{FichierBlast ""} {CouOut ""} {PilOut ""}} {
if {[EstUnPAB $FichierBlast]} {
set Nom $FichierBlast
set FichierBlast [GscopeFile $Nom "blastOri"]
set RepCouvert "[RepertoireDuGenome]/Couvert"
set RepPiliers "[RepertoireDuGenome]/Piliers"
file mkdir $RepCouvert
file mkdir $RepPiliers
set Alias [ExtraitInfo $Nom "Alias" ]
if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$Alias.csv" }
if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$Alias.csv" }
} else {
if {$CouOut==""} { set CouOut $FichierBlast.Couvert.csv }
if {$PilOut==""} { set PilOut $FichierBlast.Piliers.csv }
}
FromBlastIndel "" "" $FichierBlast
set QL [FromBlastIndel Query LE]
foreach I [NombresEntre 1 $QL] {
set TaillePilier($I) 0
}
foreach Access [FromBlastIndel ListOf AC] {
set LE [FromBlastIndel $Access LE]
set SS [FromBlastIndel $Access SS]
set PN [FromBlastIndel $Access PN]
set DE [FromBlastIndel $Access DE] ; regsub -all ";" $DE "," DE
set OS [FromBlastIndel $Access OS]
set LH [FromBlastIndel $Access LH]
set LesZones [regexp -all -indices -inline {[A-Z]+} $SS]
set Couverture 0
foreach Zone $LesZones {
lassign $Zone D F
set LZ [expr $F-$D+1]
incr Couverture $LZ
}
set PC [expr round(100.*$LZ/$LE)]
#Espionne [format "%-20s %4d / %4d = %3d%s %s" $Access $LZ $LE $PC % $LesZones]
set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE]
foreach H $LH {
lassign $H AccessLu nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
foreach I [NombresEntre $DebutQuery $FinQuery] {
incr TaillePilier($I)
}
}
lappend LesCouvert [join $LaLigne ";"]
}
set Max 0
foreach I [NombresEntre 1 $QL] {
set T $TaillePilier($I)
set Max [Maxi $Max $T]
}
foreach I [NombresEntre 1 $QL] {
set T $TaillePilier($I)
set L [expr (200*$T)/$Max + 1]
set Ligne [string repeat "*" $L]
Espionne [format %4d $I] $Ligne
lappend LesPiliers "$I;$T"
}
if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut }
if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut }
return [list $CouOut $PilOut]
}
proc TBA {} {
set LesAC [list WP_011223034.1 RLE46311.1 WP_048156073.1 WP_048120871.1 WP_116481489.1 WP_015054741.1 PKL53186.1 KXS40906.1 WP_011307748.1 WP_048117745.1 ABK14390.1 WP_048124872.1 WP_011021597.1 WP_048039201.1 WP_048038741.1 WP_048171932.1 WP_048182357.1 WP_048107223.1 WP_048195132.1 WP_048205782.1 WP_095645014.1 WP_048137421.1]
return [BlastAli blast_Ark_Topo6B.txt $LesAC]
}
proc BlastAliComprime {Fichier} {
set I -1
set LesAccess [LaSequenceDuTFAs $Fichier "LaListeDesAccess"]
foreach Access $LesAccess {
incr I
lappend LesI $I
set Tfa [LaSequenceDuTFAs $Fichier $Access]
set Seq [QueLaSequenceDuTexteTFA $Tfa]
set LesC [split $Seq ""]
set J -1
foreach C $LesC {
incr J
set T($I,$J) $C
if {$C ne "-"} { lappend LesIduNonVide($J) $I; incr TaillePilier($J) }
}
}
set DernierJ $J
set LesJ [NombresEntre 0 $DernierJ]
foreach J $LesJ {
if { ! [info exists TaillePilier($J)] || $TaillePilier($J)>1} { continue }
set I [lindex $LesIduNonVide($J) 0]
set K $J
set KVide ""
set Comprime 0
while {$K<$DernierJ} {
incr K
if {$T($I,$K) eq "-"} {
set KVide $K
while {$K > $J} {
set T($I,$K) $T($I,[incr K -1])
Espionne T($I,$K) $T($I,$K)
}
set T($I,$J) "-"
set Comprime 1
break
}
}
if {$Comprime} {
incr TaillePilier($KVide)
set JComprime($J) $I
}
}
set LeTFAs {}
set I -1
foreach Access $LesAccess {
incr I
set LaSeq {}
foreach J [NombresEntre 0 $DernierJ] {
if {[info exists JComprime($J)]} { continue }
lappend LaSeq $T($I,$J)
}
set Seq [join $LaSeq ""]
set SeqOri [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]]
if {0 && $SeqOri ne $Seq} {
Espionne
Espionne $Access
Espionne [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]]
Espionne $Seq
}
set TFA [SequenceFormatTFA $Seq $Access "protbrut"]
#Espionne $TFA
lappend LeTFAs $TFA
}
set New $Fichier
regsub {.tfa$} $New "" New
append New "_new.tfa"
SauveLesLignes $LeTFAs dans $New
return $New
}
proc BlastAli {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""}} {
global BlastIndel
set DisplayStats 0
set ShowStats 0
if {[regexp -nocase "ShowStats|DisplayStats" $Nieme]} {
if {[regexp -nocase "DisplayStats" $Nieme]} { set DisplayStats 1 }
set ShowStats 1
set Nieme ""
}
if {[EstUnPAB $Fichier]} {
set Nom $Fichier
set Fichier [GscopeFile $Nom "blastOri"]
set Memo [GscopeFile $Nom "memo"]
set FichierTfa [GscopeFile $Nom "tfaFromBOri"]
} else {
set Memo "${Fichier}_[join $Nieme _].memo"
set FichierTfa $Fichier
regsub {.txt$} $FichierTfa "" FichierTfa
append FichierTfa .tfa
}
if {[file exists $Memo]} {
array set BlastIndel [ContenuDuFichier $Memo]
} else {
BlastIndel $Fichier $Nieme $SeuilExpect $MaxListe
Sauve [array get BlastIndel] dans $Memo
}
set GapApres(trululu) ""
foreach K [array names BlastIndel "wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww"] {
Espionne BlastIndel($K)=$BlastIndel($K)=
}
#exit
set AVoir "WP_049995565.1==================="
foreach AC $BlastIndel(ListOf,AC) {
set MontreAC 0
if {$AC==$AVoir} { set MontreAC 1 }
#Espionne
#Espionne $BlastIndel($AC,AC) $BlastIndel($AC,OS) $BlastIndel($AC,LE) $BlastIndel($AC,DE)
set IemeH 0
foreach H $BlastIndel($AC,LH) {
incr IemeH
lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
set IQ [expr $DebutQuery-1]
set IS [expr $DebutSbjct-1]
set J [expr $IQ-1]
if {$MontreAC} {
Espionne AC $AC
EspionneL $H
Espionne IQ=$IQ IS=$IS
Espionne SeqQuery
Espionne =$SeqQuery=
Espionne SeqSbjct
Espionne =$SeqSbjct=
}
foreach AQ [split $SeqQuery ""] AS [split $SeqSbjct ""] {
incr J
if {$AQ=="-" && $AS=="-"} {
set GapAt(Query,$J) 1
set GapAt($Access,$J) 1
#incr GapApres(Q,$AC,$IQ)
incr GapApres(S,$AC,$IS)
continue
}
if {$AQ=="-" && $AS!="-"} {
set GapAt(Query,$J) 1
incr IS
incr GapApres(Q,$AC,$IQ)
lappend SousQueryEtendu($IQ) "$AC $IS"
#Espionne SousQueryEtendu($IQ) "$AC $IS $GapApres(Q,$AC,$IQ)"
continue
}
if {$AQ!="-" && $AS=="-"} {
set GapAt($Access,$J) 1
incr IQ
incr GapApres(S,$AC,$IS)
continue
}
if {$AQ!="-" && $AS!="-"} {
incr IQ
incr IS
lappend SousQuery($IQ) "$AC $IS"
continue
}
}
}
}
foreach Key [array names GapAt(*$AVoir*)] {
Espionne GapAt($K) $GapAt($K)
}
#exit
set LengthWithGap $BlastIndel(Query,LE)
set SeqGap ""
set Seq $BlastIndel(Query,SQ)
foreach IQ [NombresEntre 0 [expr $BlastIndel(Query,LE)-1]] {
set MaxGap 0
foreach K [array names GapApres -glob "Q,*,$IQ"] {
set MaxGap [Maxi $MaxGap $GapApres($K)]
#Espionne $IQ GapApres($K) $GapApres($K) $MaxGap
}
if {$ShowStats} {
set Message "[format {%4d %3d} $IQ $MaxGap] [string repeat * $MaxGap]"
lappend LesStats $Message
Espionne $Message
}
incr LengthWithGap $MaxGap
append SeqGap [string index $Seq $IQ-1]
append SeqGap [string repeat "-" $MaxGap]
}
if {$DisplayStats} { return [AfficheListe $LesStats "" "Couvert/$Nom"] }
if {$ShowStats} { return "FinShowStats" }
#parray SousQuery
#parray GapApres
#Espionne $LengthWithGap
set IQ 0
set J 0
foreach Q [split $SeqGap ""] {
set Ali(Query,$J) $Q
if {$Q!="-"} { incr IQ }
if {[info exists SousQuery($IQ)]} {
foreach ACPos $SousQuery($IQ) {
scan $ACPos "%s %s" AC Pos
set Ali($AC,$J) [string index $BlastIndel($AC,SS) $Pos-1]
}
unset SousQuery($IQ)
}
if {[info exists SousQueryEtendu($IQ)]} {
if {[info exists iDe]} { unset iDe }
foreach ACPos $SousQueryEtendu($IQ) {
scan $ACPos "%s %s" AC Pos
incr iDe($AC)
set Ali($AC,[expr $J+$iDe($AC)]) [string index $BlastIndel($AC,SS) $Pos-1]
}
unset SousQueryEtendu($IQ)
}
incr J
}
set LesLignes {}
foreach AC [concat [list Query] $BlastIndel(ListOf,AC)] {
set MontreAC 0
set Previous ""
set Ligne ""
set IS 0
foreach J [NombresEntre 0 [expr $LengthWithGap-1]] {
if {$MontreAC} { Espionne $AC $J info exists Ali($AC,$J) [info exists Ali($AC,$J)] [info exists GapAt($AC,$J)] }
if {[info exists Ali($AC,$J)]} {
append Ligne $Ali($AC,$J)
set Previous $Ali($AC,$J)
incr IS
} elseif {$Previous=="-"} {
#Espionne 2 $J -
append Ligne "-"
set Previous "-"
} elseif {[info exists GapAt($AC,$J)]} {
#Espionne 3 $J -
append Ligne "-"
set Previous "-"
} else {
#Espionne 4 $J .
append Ligne .
set Previous .
}
}
#Espionne [string range $Ligne 0 250]
lappend LesLignes ">$AC"
regsub -all {\.} $Ligne "-" Ligne
regsub -all {\?} $Ligne "X" Ligne
lappend LesLignes $Ligne
}
return [SauveLesLignes $LesLignes dans $FichierTfa]
}
proc FromBlastIndel {{Qui ""} {Quoi ""} args} {
global BlastIndel
#rR Attention si args est non vide on recharge tout
if {$args!={}} {
if {[info exists BlastIndel]} { unset BlastIndel }
BlastIndel {*}$args
return [FromBlastIndel $Qui $Quoi]
}
if {[info exists BlastIndel($Qui,$Quoi)]} { return $BlastIndel($Qui,$Quoi) }
if {[info exists BlastIndel("EstCharge")]} { return "" }
set BlastIndel("EstCharge") 1
return [FromBlastIndel $Qui $Quoi]
}
proc BlastIndel {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""} {Qui ""} {Quoi ""}} {
global BlastIndel
if {[info exists BlastIndel]} { unset BlastIndel } ; #rR 2019/01/21
set NiemeEstUnNombre [regexp {^[0-9]+$} $Nieme]
set NiemeEstUnAccess [regexp -nocase {[a-z]} $Nieme]
if {$NiemeEstUnNombre} { set MaxListe $Nieme }
set NbHits [DecortiqueBlast $Fichier $SeuilExpect $MaxListe Query lBanqueId lAccess lDE lProfil lPN lPartieSegAli]
set Tout [ContenuDuFichier $Fichier]
set QueryLength [IntegerApres "Length=" dans $Tout]
if {$QueryLength=="" && ! [regexp {\(([0-9]+) letters\)} $Tout Match QueryLength]} {
FaireLire "I cannot find the QueryLength"
}
Espionne $QueryLength
set SQ [string repeat "?" $QueryLength]
set N 0
foreach BId $lBanqueId PN $lPN SegAli $lPartieSegAli {
incr N
#Espionne BId $BId $N/$NbHits $Nieme $NiemeEstUnNombre $NiemeEstUnAccess
if {$NiemeEstUnNombre && $N!=$Nieme} { continue }
if {$NiemeEstUnAccess && [lsearch $Nieme $BId]<0} { continue }
#EspionneL $SegAli
set Access ""
set Definition ""
set OS "Unknown unknown"
set Entete [join $SegAli " "]
regsub {\n Score = .+$} $Entete "" Entete
regsub -all { +} $Entete " " Entete
set Entete [string trim $Entete]
if { [regexp {>([^ ]+) ([^\[]+)\[([^\]]+)\]} $Entete Match Access Definition OS]} {
} else {
if { [regexp {>([^ ]+) RecName: Full=(.+) AltName:} $Entete Match Access Definition]} {
} else {
if {[regexp {>([^ ]+) (.+)$} [lindex $SegAli 0] Match Access Definition]} {
} else {
Espionne $Entete; FaireLire "Ca merde pour $BId"; continue
}
}
}
set Access [string trim $Access]
set Definition [string trim $Definition]
set OS [string trim $OS]
set Length -1
set SS ""
lappend SegAli "FinSegAli" "FinSegAli" "FinSegAli"
lassign [LectureSegAli $Access $SQ $OS $SegAli] SQ SS Length ListeDesHomologues
if {[info exists DejaVuQueryEn]} { unset DejaVuQueryEn }
if {[info exists DejaVuSbjctEn]} { unset DejaVuSbjctEn }
foreach H $ListeDesHomologues {
lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct
set MontreAC [string equal $Access "WP_049995561.1"]
set SeqQueryGarde ""
set SeqConseGarde ""
set SeqSbjctGarde ""
set iQueryCourant [expr $DebutQuery-1]
set iSbjctCourant [expr $DebutSbjct-1]
if {[info exist DebutQueryGarde]} { unset DebutQueryGarde }
if {[info exist DebutSbjctGarde]} { unset DebutSbjctGarde }
set OnGarde 0
if {[info exists DejaVuQueryLocalEn]} { unset DejaVuQueryLocalEn }
if {[info exists DejaVuSbjctLocalEn]} { unset DejaVuSbjctLocalEn }
foreach RQ [split $SeqQuery ""] RC [split $SeqConse ""] RS [split $SeqSbjct ""] {
if {$RQ!="-"} {
incr iQueryCourant
if { ! [info exists DebutQueryGarde]} {
set DebutQueryGarde $iQueryCourant
set OnGarde 1
}
set FinQueryGarde $iQueryCourant
}
if {$RS!="-"} {
incr iSbjctCourant
if { ! [info exists DebutSbjctGarde]} {
set DebutSbjctGarde $iSbjctCourant
set OnGarde 1
}
set FinSbjctGarde $iSbjctCourant
}
#Espionne =====================================================================$nCops=$RQ=$RS=DejaVuQueryEn($iQueryCourant) DejaVuSbjctEn($iSbjctCourant) [info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)]
if {[info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)]} {
continue
}
#Espionne =$RQ=$RS=$OnGarde=
if {$OnGarde} {
append SeqQueryGarde $RQ
append SeqConseGarde $RC
append SeqSbjctGarde $RS
}
set DejaVuQueryLocalEn($iQueryCourant) 1
set DejaVuSbjctLOcalEn($iSbjctCourant) 1
set FinQueryGarde $iQueryCourant
set FinSbjctGarde $iSbjctCourant
}
set HomologieGarde [list $Access $nCops $nCopsDansCetteSequence $OS $Score $Expect $DebutQueryGarde $FinQueryGarde $DebutSbjctGarde $FinSbjctGarde $SeqQueryGarde $SeqConseGarde $SeqSbjctGarde]
lappend ListeDesHomologuesGarde $HomologieGarde
lappend BlastIndel($Access,LH) $HomologieGarde
foreach I [array names DejaVuQueryLocalEn] { set DejaVuQueryEn($I) 1 }
foreach I [array names DejaVuSbjctLocalEn] { set DejaVuSbjctEn($I) 1 }
}
set LH [lsort -integer -index 6 $ListeDesHomologuesGarde]
lappend BlastIndel(ListOf,AC) $Access
set BlastIndel($Access,AC) $Access
set BlastIndel($Access,PN) $PN
set BlastIndel($Access,DE) $Definition
set BlastIndel($Access,OS) $OS
set BlastIndel($Access,SS) $SS
set BlastIndel($Access,LE) $Length
}
set BlastIndel(Query,LE) $QueryLength
set BlastIndel(Query,SQ) $SQ
if {$Qui!=""} { return $BlastIndel($Qui,$Quoi) }
return "BlastIndel"
}
if {[OnTraite CilioCarta]} { source /home/ripp/gscope/gscope_blome.tcl }
proc BlastomicsDir {} {
return "/home/ripp/www/blastomics"
}
proc BlastomicsDbDir {} {
return "[BlastomicsDir]/db"
}
proc TestBlastomicsSql {Projet Phylum} {
set LeSql [list]
lappend LeSql "SELECT COUNT(ln_organism_taxobla.pk_taxobla), specie, taxid, ln_organism_taxobla.pk_organism, clade.nom "
lappend LeSql "FROM ln_organism_taxobla join taxobla ON taxobla.pk_taxobla =ln_organism_taxobla.pk_taxobla "
lappend LeSql "JOIN organism ON organism.pk_organism= ln_organism_taxobla.pk_organism "
lappend LeSql "JOIN ln_clade_organism ON ln_clade_organism.pk_organism = organism.pk_organism "
lappend LeSql "JOIN clade ON clade.pk_clade=ln_clade_organism.pk_clade "
lappend LeSql "GROUP BY organism.pk_organism ORDER BY clade.pk_clade"
set Sql [join $LeSql " "]
set R [SqlExecForDatabase "[BlastomicsDbDir]/${Projet}_${Phylum}.db" $Sql GetList]
EspionneL $R
exit
}
proc BlastomicsCladesClaudineNeSertPlus {} {
#rR attention il y a ausi des choses qui ressemblent dans gscope_oi ...
set LesPhylum [SqlExecForDatabase "[BlastomicsDbDir]/AHalma_Archaea.db" "select nom from clade order by nom" "GetList"]
set LesPhylum [OrderedClades $LesPhylum]
foreach PhName $LesPhylum {
set PhTaxId [Tax $PhName TaxId]
set PhClass [TaxClass $PhTaxId Name]
Espionne [format "%10d %-30s %s" $PhTaxId $PhName $PhClass]
}
OiDomain Archaea
foreach OX [OiCode ListOf OX] {
set OS [OiCode $OX OS]
Espionne "$OX $OS"
}
}
proc CanalSqlTaxobla {{Bdd ""}} {
if {$Bdd!="" && [FileAbsent $Bdd]} { set Bdd "[BlastomicsDbDir]/$Bdd" }
Espionne $Bdd
set Queue [file tail $Bdd]
if { "[CanalSql "GetDbname"].db" != $Queue } {
Espionne [CanalSql "GetDbname"]
Espionne $Queue
CanalSqlDisconnect
CanalSql [ConnInfoForDatabase $Bdd]
}
return [CanalSql]
}
proc BlastomicsFilterTaxobla {{Bdd ""} {ListOfPkOrg ""} {ListOfNot ""} {ListOfCladeCounts ""}} {
#rR Attention cette procédure se rappelle elle-même pour les Not
CanalSqlTaxobla $Bdd
set UseUnion [string equal -nocase $ListOfCladeCounts "UseUnion"]
LogWscope $ListOfPkOrg
LogWscope $ListOfNot
if {$ListOfPkOrg=="-"} { set ListOfPkOrg "" }
if {$ListOfNot =="-"} { set ListOfNot "" }
if {[regexp {[^0-9]} $ListOfPkOrg]} {
regsub -all {[^0-9]+} $ListOfPkOrg "," ListOfPkOrg
set ListOfPkOrg [split [string trim $ListOfPkOrg ","] ","]
}
if {$ListOfPkOrg==""} {
set LesPkTot [SqlExec "select pk_taxobla from taxobla" "GetList"]
} else {
set PremiereFois 1
foreach PkO $ListOfPkOrg {
set Sql "select pk_taxobla from ln_organism_taxobla where pk_organism==$PkO"
set LesPkT [SqlExec $Sql "GetList"]
if {$PremiereFois} {
set LesPkTot $LesPkT
set PremiereFois 0
} else {
if {$UseUnion} {
set LesPkTot [ListsUnion $LesPkTot $LesPkT]
} else {
set LesPkTot [ListsIntersection $LesPkTot $LesPkT]
}
}
}
}
if {$ListOfNot=="GetListOfPk"} { return $LesPkTot }
if {$ListOfNot!=""} {
set LesNot [BlastomicsFilterTaxobla $Bdd $ListOfNot "GetListOfPk" "UseUnion"]
set LesPkTot [ListsComplement $LesPkTot $LesNot]
}
#####################if {$ListOfCladeCounts==""} { return $LesPkTot }
#rR on suppose qu'il n 'y a pas de Absence checkée si un nombre de species est demandé.
#rR donc on peut filter sur le nombre voulu.
set LesNbHitsDesClades {}
foreach CC [split $ListOfCladeCounts ","] {
LogWscope $CC
set CountText ""
lassign [split $CC "/"] Clade CountText
LogWscope "$Clade $CountText"
set LesBornes [split $CountText "-"]
set D [lindex $LesBornes 0]
set F [lindex $LesBornes end]
set LesBonsNb {}
set LeSql {}
lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade "
lappend LeSql " where "
lappend LeSql " '$Clade' = clade.nom "
lappend LeSql " and clade.pk_clade = sapin.pk_clade "
lappend LeSql " and sapin.pk_sapin = branche.pk_sapin "
lappend LeSql " and branche.pk_branche = ln_branche_taxobla.pk_branche "
lappend LeSql " and branche.id_branche >= $D and branche.id_branche <= $F"
set Sql [join $LeSql " "]
regsub -all { +} $Sql " " Sql
LogWscope $Sql
set LesBonsNb [SqlExec $Sql "GetList"]
set Nb [llength $LesBonsNb]
LogWscope "$Nb trouves"
lappend LesNbHitsDesClades "$Clade=$D-$F"
set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb]
}
#foreach CC [split $ListOfCladeCounts ","] {
# LogWscope $CC
# scan $CC "%d/%s" PkClade CountText
# set LesBornes [split $CountText "-"]
# set D [lindex $LesBornes 0]
# set F [lindex $LesBornes end]
# set LesBonsNb {}
# set LeSql {}
# lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade "
# lappend LeSql " where "
# lappend LeSql " $PkClade = sapin.pk_clade "
# lappend LeSql " and sapin.pk_sapin = branche.pk_sapin "
# lappend LeSql " and branche.pk_branche = ln_branche_taxobla.pk_branche "
# lappend LeSql " and branche.id_branche >= $D and branche.id_branche <= $F"
# set Sql [join $LeSql " "]
# regsub -all { +} $Sql " " Sql
# Espionne $Sql
# set LesBonsNb [SqlExec $Sql "GetList"]
# set Nb [llength $LesBonsNb]
# lappend LesNbHitsDesClades "$PkClade=$D-$F"
# set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb]
# }
set LesPk {}
set LesGscopeId {}
set LesGeneId {}
set Sql "select pk_taxobla, gscopeid, gene_id from taxobla where pk_taxobla in ([join $LesPkTot ,])"
LogWscope $Sql
foreach {Pk GscopeId GeneId} [SqlExec $Sql "GetList"] {
lappend LesPk $Pk
lappend LesGscopeId $GscopeId
if {$GeneId!=""} { lappend LesGeneId $GeneId }
}
set Pks [join $LesPk " "]
set Noms [join $LesGscopeId " "]
set GeneIds [join $LesGeneId " "]
set NbHitsDesClades [join $LesNbHitsDesClades " "]
set Resultat "$Pks/$Noms/$GeneIds/$NbHitsDesClades"
return $Resultat
}
proc CladeContentWithinOi2017 {Clade {KindOfClades ""}} {
JeMeSignale
#rR on ne garde que ceux qui existent aussi dans Oi2017 ATTENTION AU DOMAINE ... à faire
if {$KindOfClades==""} {
if {[OiDomain]=="Archaea"} { set KindOfClades "Archaea" }
if {[OiDomain]=="Eukaryota"} { set KindOfClades "CMC" } ; #rR a corriger pour la suite !!!!!!!!!!!
}
Espionne "OiDomain =[OiDomain]= KindOfClades =$KindOfClades="
if {$KindOfClades=="CMC"} {
set Domaine "Eukaryota"
set LesTaxId [CMC $Clade OX]
} elseif {$KindOfClades=="JoyFungi"} {
set Domaine "Eukaryota"
set LesTaxId [Paraph $KindOfClades $Clade OX]
} elseif {$KindOfClades=="Bac8"} {
set Domaine "Bacteria"
set LesTaxId [Paraph $KindOfClades $Clade OX]
} elseif {$KindOfClades=="Bac9"} {
set Domaine "Bacteria"
set LesTaxId [Paraph $KindOfClades $Clade OX]
} elseif {$KindOfClades=="TTLL"} {
set Domaine "Eukaryota"
set LesTaxId [TTLL $Clade OX]
} elseif {$KindOfClades=="Archaea"} {
set Domaine "Archaea"
set LesTaxId [PhylAr $Clade ListOfOX]
} else {
set Domaine "Eukaryota"
set LesTaxId [CladeContent $Clade]
}
set LesBons {}
foreach TaxId $LesTaxId {
#if {[OiCodeForDomain $Domaine $TaxId OI]==""} { continue }
if {[OiCode $TaxId OI]==""} { continue }
lappend LesBons $TaxId
}
return $LesBons
}
proc BlastomicsCreateDb {{Project ""} {KindOfClades ""} {GetWhat ""}} {
if {$Project==""} { set Project [file tail [RepertoireDuGenome]] }
set Bdd "[BlastomicsDbDir]/${Project}_${KindOfClades}.db"
if {[FileExists $Bdd]} {
if {[OuiOuNon "$Bdd already exists. Do I overwrite ?"]} {
Garde $Bdd
file delete $Bdd
} else {
return "Error: Already exists"
}
}
Espionne [CanalSql [ConnInfoForDatabase $Bdd "CreateIfNotExists"]]
SqlExec "BEGIN TRANSACTION"
SqlExec "DROP TABLE IF EXISTS ln_clade_organism"
SqlExec "DROP TABLE IF EXISTS ln_organism_taxobla"
SqlExec "DROP TABLE IF EXISTS ln_branche_taxobla"
SqlExec "DROP TABLE IF EXISTS branche"
SqlExec "DROP TABLE IF EXISTS sapin"
SqlExec "DROP TABLE IF EXISTS organism"
SqlExec "DROP TABLE IF EXISTS taxobla"
SqlExec "DROP TABLE IF EXISTS clade"
SqlExec "CREATE TABLE clade (pk_clade INTEGER , nom VARCHAR(15))"
SqlExec "CREATE TABLE organism(pk_organism INTEGER , taxid VARCHAR(10),
specie VARCHAR(25))"
SqlExec "CREATE TABLE taxobla (pk_taxobla INTEGER , gscopeid VARCHAR(10),
chemin VARCHAR(20),
uniprotaccess VARCHAR(12),
uniprotid VARCHAR(15),
gene_name VARCHAR(20),
gene_id VARCHAR(20))"
SqlExec "CREATE TABLE branche (pk_branche INTEGER , pk_sapin INTEGER,
id_branche INTEGER,
largeur INTEGER)"
SqlExec "CREATE TABLE ln_clade_organism (pk_clade INTEGER,
pk_organism INTEGER)"
SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla INTEGER,
pk_organism INTEGER)"
SqlExec "CREATE TABLE ln_branche_taxobla (pk_branche INTEGER,
pk_taxobla INTEGER)"
SqlExec "CREATE TABLE sapin (pk_sapin INTEGER,
pk_clade INTEGER)"
if {0} {
SqlExec "CREATE TABLE clade (pk_clade INTEGER PRIMARY KEY AUTOINCREMENT, nom VARCHAR(15))"
SqlExec "CREATE TABLE organism(pk_organism INTEGER PRIMARY KEY AUTOINCREMENT, taxid VARCHAR(10),
specie VARCHAR(25))"
SqlExec "CREATE TABLE taxobla (pk_taxobla INTEGER PRIMARY KEY AUTOINCREMENT, gscopeid VARCHAR(10),
chemin VARCHAR(20),
uniprotaccess VARCHAR(12),
uniprotid VARCHAR(15),
gene_name VARCHAR(20),
gene_id VARCHAR(20))"
SqlExec "CREATE TABLE branche (pk_branche INTEGER PRIMARY KEY AUTOINCREMENT, pk_sapin INTEGER,
id_branche INTEGER,
largeur INTEGER,
FOREIGN KEY(pk_sapin) REFERENCES sapin(pk_sapin))"
SqlExec "CREATE TABLE ln_clade_organism (pk_clade INTEGER,
pk_organism INTEGER,
PRIMARY KEY(pk_clade, pk_organism),
FOREIGN KEY(pk_clade) REFERENCES clade(pk_clade),
FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism))"
SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla INTEGER,
pk_organism INTEGER,
PRIMARY KEY(pk_taxobla, pk_organism),
FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism),
FOREIGN KEY(pk_taxobla) REFERENCES taxobla(pk_taxobla))"
SqlExec "CREATE TABLE ln_branche_taxobla (pk_branche INTEGER,
pk_taxobla INTEGER,
PRIMARY KEY(pk_branche, pk_taxobla),
FOREIGN KEY(pk_branche) REFERENCES branche(pk_branche),
FOREIGN KEY(pk_taxobla) REFERENCES taxobla(pk_taxobla))"
SqlExec "CREATE TABLE sapin (pk_sapin INTEGER PRIMARY KEY AUTOINCREMENT,
pk_clade INTEGER,
FOREIGN KEY(pk_clade) REFERENCES clade(pk_clade)) "
}
SqlExec "COMMIT"
if {$GetWhat=="GetHandleAndFilename"} { return [list [CanalSql] $Bdd] }
if {$GetWhat=="GetHandle" } { return [CanalSql] }
CanalSqlDisconnect
return $Bdd
}
proc BlastomicsNewQuery {{Project ""} {KindOfClades ""}} {
set Overwrite 1
set RepSql "[RepertoireDuGenome]/blomesql"
file mkdir $RepSql
lassign [BlastomicsCreateDb $Project $KindOfClades "GetHandleAndFilename"] SqlHandle Bdd
if {[regexp "^Error" $SqlHandle]} { return $SqlHandle }
set LesFichiersACharger {}
if {$Overwrite} {
set TaxIdQuery [NotreOX]
set ListOfClades [lsort -unique [BlastomicsClades $KindOfClades]]
foreach Clade $ListOfClades {
Espionne "Clade $Clade"
set OrgsFromClade($Clade) [CladeContentWithinOi2017 $Clade $KindOfClades]
Espionne $OrgsFromClade($Clade)
set Cardinal($Clade) [llength $OrgsFromClade($Clade)]
Espionne $Cardinal($Clade)
foreach TaxId $OrgsFromClade($Clade) {
lappend AllTaxId $TaxId
set OsFrom($TaxId) [Tax $TaxId "Name"]
lappend LesCladesDuTaxId($TaxId) $Clade
}
}
set AllTaxId [lsort -unique -integer $AllTaxId]
set PkClade 0
set SqlPourClade {}
foreach Clade $ListOfClades {
incr PkClade
lappend SqlPourClade "$PkClade\t$Clade"
set PkDuClade($Clade) $PkClade
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourClade dans "$RepSql/10_SqlPour-clade.txt"]
set PkOrganism 0
set SqlPourOrganism {}
foreach TaxId [lsort -integer [array names OsFrom]] {
set Species $OsFrom($TaxId)
incr PkOrganism
lappend SqlPourOrganism "$PkOrganism\t$TaxId\t$Species"
set PkDuOrganismTaxId($TaxId) $PkOrganism
set PkDuOrganismSpecies($Species) $PkOrganism
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourOrganism dans "$RepSql/20_SqlPour-organism.txt"]
set SqlPourLnCladeOrganism {}
foreach Clade $ListOfClades {
foreach TaxId $OrgsFromClade($Clade) {
set PkOrganism $PkDuOrganismTaxId($TaxId)
lappend SqlPourLnCladeOrganism $PkDuClade($Clade)\t$PkOrganism
}
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourLnCladeOrganism dans "$RepSql/30_SqlPour-ln_clade_organism.txt"]
set SqlPourSapin {}
foreach Clade $ListOfClades {
set PkSapin $PkDuClade($Clade) ;#rR je ne sais pas si peut y en avoir d'autres ???
lappend SqlPourSapin "$PkSapin\t$PkDuClade($Clade)"
lappend LesPkSapin $PkSapin
lappend LesCladesDuSapin($PkSapin) $Clade
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourSapin dans "$RepSql/40_SqlPour-sapin.txt"]
set PkBranche 0
set SqlPourBranche {}
foreach PkSapin $LesPkSapin {
foreach Clade $LesCladesDuSapin($PkSapin) {
set Largeur [llength $OrgsFromClade($Clade)]
foreach IdBranche [NombresEntre 0 $Largeur] {
incr PkBranche
lappend SqlPourBranche $PkBranche\t$PkSapin\t$IdBranche\t$Largeur
set PkBrancheDe($PkSapin,$IdBranche) $PkBranche
}
}
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourBranche dans "$RepSql/50_SqlPour-branche.txt"]
set PkTaxobla 0
set SqlPourTaxobla {}
foreach Nom [ListeDesPABs] {
incr PkTaxobla
set PkTaxoblaDe($Nom) $PkTaxobla
set Ac [ExtraitInfo $Nom "AC:"]
set Id [ExtraitInfo $Nom "ID:"]
set Gn [ExtraitInfo $Nom "ValiGN:"] ; if {$Gn==""} { set Gn [ExtraitInfo $Nom "GN:"] }
set Gi [ExtraitInfo $Nom "GI:"] ; if {$Gi==""} { set Gi [NIAG $Nom "G"] }
lappend SqlPourTaxobla "$PkTaxobla\t$Nom\ttaxobla\t$Ac\t$Id\t$Gn\t$Gi"
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourTaxobla dans "$RepSql/60_SqlPour-taxobla.txt"]
set SqlPourLnOrganismTaxobla {}
foreach Nom [ListeDesPABs] {
set PkTaxobla $PkTaxoblaDe($Nom)
foreach Clade $ListOfClades {
set PkClade $PkDuClade($Clade)
set NbHitsDansClade($PkTaxobla,$PkClade) 0
}
if {[FileAbsent [GscopeFile $Nom "taxobla"]]} { continue }
foreach Ligne [LesLignesDuFichier [GscopeFile $Nom "taxobla"]] {
scan $Ligne "%f %f %d" Pn Score TaxId
if {$Pn>0.001} { break }
set Score [expr round($Score)]
if { ! [info exists PkDuOrganismTaxId($TaxId)]} { continue }
set PkOrganism $PkDuOrganismTaxId($TaxId)
lappend SqlPourLnOrganismTaxobla "$PkTaxobla\t$PkOrganism"
foreach Clade $LesCladesDuTaxId($TaxId) {
set PkClade $PkDuClade($Clade)
incr NbHitsDansClade($PkTaxobla,$PkClade)
}
}
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourLnOrganismTaxobla dans "$RepSql/70_SqlPour-ln_organism_taxobla.txt"]
set SqlPourLnBrancheTaxobla {}
foreach {K NbHits} [array get NbHitsDansClade] {
lassign [split $K ","] PkTaxobla PkSapin
if { ! [info exists PkBrancheDe($PkSapin,$NbHits)]} {
Espionne "not existing PkBrancheDe($PkSapin,$NbHits) PkTaxobla $PkTaxobla"
continue
}
set PkBranche $PkBrancheDe($PkSapin,$NbHits) ;#rR NbHits = IdBranche
lappend SqlPourLnBrancheTaxobla "$PkBranche\t$PkTaxobla"
}
lappend LesFichiersACharger [SauveLesLignes $SqlPourLnBrancheTaxobla dans "$RepSql/80_SqlPour-ln_branche_taxobla.txt"]
} else {
set LesFichiersACharger [lsort [glob "$RepSql/*SqlPour-*"]]
}
foreach Fichier $LesFichiersACharger {
set Queue [file tail $Fichier]
if { ! [regexp {\-([^\.]+)\.} $Queue Match Table]} { continue }
$SqlHandle copy abort $Table $Fichier "\t"
Espionne "$Table done"
}
BlastomicsCreateIndex
CanalSqlDisconnect
return $Bdd
}
proc BlastomicsCreateIndex {{Bdd ""}} {
if {$Bdd!=""} { CanalSql [ConnInfoForDatabase $Bdd] }
SqlExec "CREATE INDEX iclade ON clade (pk_clade, nom)"
SqlExec "CREATE INDEX iorganism ON organism (pk_organism, taxid, specie)"
SqlExec "CREATE INDEX itaxobla ON taxobla (pk_taxobla, gscopeid)"
SqlExec "CREATE INDEX ibranche ON branche (pk_branche)"
SqlExec "CREATE INDEX isapin ON sapin (pk_sapin, pk_clade)"
SqlExec "CREATE INDEX ilnco ON ln_clade_organism (pk_clade, pk_organism)"
SqlExec "CREATE INDEX ilnot ON ln_organism_taxobla (pk_taxobla, pk_organism)"
SqlExec "CREATE INDEX ilbt ON ln_branche_taxobla (pk_branche, pk_taxobla)"
if {$Bdd!=""} { CanalSqlDisconnect ; return "" }
return [CanalSql]
}
proc BlastomicsClades {{Quoi ""} {GetWhat ""}} {
#rR Est appelé par le python Blame ... en question de science ...
#rR Merci de me mettre à jour
set Possibles [lsort [list Archaea Bac8 Bac9 CMC Cilio TTLL JoyFungi]]
set Clades {}
if {[regexp -nocase "Possible" $Quoi]} {
set Clades $Possibles
}
if {$Quoi=="Archaea"} {
set Clades [PhylAr ListOf PhyOS]
}
if {$Quoi=="CMC"} {
set Clades [CMC ListOf JoinedClade]
}
if {$Quoi=="JoyFungi"} {
set Clades [Paraph $Quoi ListOf JoinedClade]
}
if {$Quoi=="Bac8"} {
set Clades [Paraph $Quoi ListOf JoinedClade]
}
if {$Quoi=="Bac9"} {
set Clades [Paraph $Quoi ListOf JoinedClade]
}
if {$Quoi=="TTLL"} {
set Clades [TTLL ListOf JoinedClade]
}
if {$Quoi=="Cilio" || [OnTraite "CilioCarta" "Like"]} {
if {[OnTraite "CilioCarta"]} {
set Clades [BilanCilio ListOf JoinedClade]
} else {
set Clades [QuestionDeScience "CilioCarta" "ret BilanCilio ListOf JoinedClade"]
}
}
if {$GetWhat=="GetList"} { return [split $Clades " "] }
return $Clades
}
Index by: file name |
procedure name |
procedure call |
annotation
File generated 2022-04-05 at 12:55.