#rR gscope_blastomics.tcl proc NicoMapping {{Qui ""} {Quoi ""}} { global NicoMapping set Fichier "~scalzitti/These/G3PO_Benchmark/Data/ID_ensembl_new_version.csv" if {[info exists NicoMapping($Qui,$Quoi)]} { return $NicoMapping($Qui,$Quoi) } if {[info exists NicoMapping("EstCharge")]} { return "" } set NicoMapping("EstCharge") 1 foreach Ligne [LesLignesDuFichier $Fichier] { lassign [split $Ligne ";"] Id Acc IdT IdG if {$Id=="ID_UNIPROT"} { continue } foreach I [list Id Acc IdT IdG] { lappend NicoMapping(ListOf,$I) [set $I] foreach J [list Id Acc IdT IdG] { set NicoMapping([set $I],$J) [set $J] } } } return [NicoMapping $Qui $Quoi] } proc CarExVerif {{Liste ""}} { if {$Liste==""} { set Liste [ListeDesPABs] } foreach Nom $Liste { set GN [ExtraitInfo $Nom "ValiGN:"] set NbNon 0 set NbOui 0 set Nb 0 foreach Id [bbsc $Nom listof id] { set Cds [CarEx $Id cds] if {$Cds==""} { incr NbNon } else { incr NbOui } incr Nb } Espionne [format "%s %-12s %4d %s %5d %s %s %5d %3d%s de oui" $Nom $GN $NbNon non $NbOui oui sur $Nb [expr (100*$NbOui)/$Nb] "%"] } return finCarExVerif } proc CarEx {{Qui ""} {Quoi ""} {Quid ""}} { global CarEx set Qui [string toupper $Qui] set Quoi [string tolower $Quoi] set Quid [string tolower $Quid] if {$Qui=="" && $Quoi=="" && $Quid==""} { set Qui "HELP" } if {[info exists CarEx($Qui,$Quoi,$Quid)]} { return $CarEx($Qui,$Quoi,$Quid) } if {[info exists CarEx("EstCharge")]} { if {$Quoi=="exon" || $Quoi=="intron"} { if {[info exists CarEx($Qui,${Quoi}_$Quid,limits)]} { set Limits $CarEx($Qui,${Quoi}_$Quid,limits) if {[info exists CarEx($Qui,sg,filename)]} { set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)] return [string range n$Seq {*}$Limits] } } } if {$Quoi=="" || $Quoi=="cds"} { if {[info exists CarEx($Qui,exon,limits)]} { set Limits $CarEx($Qui,exon,limits) if {[info exists CarEx($Qui,sg,filename)]} { set Seq [QueLaSequenceDuFichierTFA $CarEx($Qui,sg,filename)] set Cds "" foreach Limit $Limits { lassign $Limit D F append Cds [string range n$Seq $D $F] } return $Cds } } } return "" } set CarEx("EstCharge") 1 set LeHelp {} lappend LeHelp "CarEx Help" lappend LeHelp "CarEx listof id" lappend LeHelp "CarEx T1JH94_STRMM ce filename ;# (carte exonique)" lappend LeHelp "CarEx T1JH94_STRMM sg filename ;# (séquence génomique)" lappend LeHelp "CarEx T1JH94_STRMM listof exon ;# 1 2 3 4" lappend LeHelp "CarEx T1JH94_STRMM listof intron ;# 1 2 3" lappend LeHelp "CarEx T1JH94_STRMM exon limits ;# {1 320} {380 466} {534 665} {776 934}" lappend LeHelp "CarEx T1JH94_STRMM exon_3 start ;# 534" lappend LeHelp "CarEx T1JH94_STRMM exon_3 end ;# 665" lappend LeHelp "CarEx T1JH94_STRMM exon_3 limits ;# 534 665" lappend LeHelp "CarEx T1JH94_STRMM exon 3 ;# GATAGTTTAATTCGAAAAAAAT... etc" lappend LeHelp "CarEx T1JH94_STRMM intron limits" lappend LeHelp "CarEx T1JH94_STRMM intron_2 start" lappend LeHelp "CarEx T1JH94_STRMM intron_2 end" lappend LeHelp "CarEx T1JH94_STRMM intron_2 limits" lappend LeHelp "CarEx T1JH94_STRMM intron 2" lappend LeHelp "CarEx T1JH94_STRMM cds ;# il y a meme le CDS" lappend LeHelp "CarEx T1JH94_STRMM ;# ... pour rien" set CarEx(HELP,,) [join $LeHelp "\n"] set SeqGenDir "/home/scalzitti/These/G3PO_Benchmark/Data/Fasta/0Kb" set CarExoDir "/home/scalzitti/These/G3PO_Benchmark/Data/Structures_map/References" set SeqGenHDir "/home/scalzitti/These/Part2_IA/Data/Structure_map/R98_0919" set CarExoHDir "/home/scalzitti/These/Part2_IA/Data/Sequences_genomiques/R98_0919/References/0Kb" foreach FicSeqGen [lsort [glob -nocomplain "$SeqGenDir/*.fasta"]] { set Queue [file tail $FicSeqGen] regsub "\.fasta$" $Queue "" Id lappend CarEx(LISTOF,sg,id) $Id set CarEx($Id,sg,filename) $FicSeqGen } foreach FicCarExo [lsort [glob -nocomplain "$CarExoDir/*.gar"]] { set Queue [file tail $FicCarExo] regsub "\.gar$" $Queue "" Id lappend CarEx(LISTOF,ce,id) $Id set CarEx($Id,ce,filename) $FicCarExo set Nom [bbsc $Id nom] set Alias [ExtraitInfo $Nom "ValiGN:"] lappend CarEx(LISTOF,nom,) $Nom lappend CarEx($Id,listof,nom) $Nom set NOM [string toupper $Nom] lappend CarEx($NOM,listof,id) $Id lappend CarEx($Id,listof,nom) $Nom foreach Ligne [LesLignesDuFichier $FicCarExo] { if { ! [regexp -nocase {^([a-z]+)_([0-9]+)[ \t]+([0-9]+)[ \t]+([0-9]+)} $Ligne Match EI I D F]} { continue } set Reverse [regexp {\-1} $Ligne] set ei [string tolower $EI] lappend CarEx($Id,listof,$ei) $I set W [Maxi $D $F] set D [Mini $D $F] set F $W set CarEx($Id,${ei}_$I,limits) "$D $F" set CarEx($Id,${ei}_$I,start) $D set CarEx($Id,${ei}_$I,end) $F lappend CarEx($Id,$ei,i) $I lappend CarEx($Id,$ei,limits) "$D $F" } if {$Reverse} { foreach ei [list exon intron] { if { ! [info exists CarEx($Id,$ei,i)]} { continue } set CarEx($Id,$ei,limits) [lreverse $CarEx($Id,$ei,limits)] set N [llength $CarEx($Id,$ei,i)] foreach What [list limits start end] { set LesNew {} foreach I $CarEx($Id,$ei,i) { lappend LesNew $CarEx($Id,${ei}_[expr $N-$I+1],$What) } foreach I $CarEx($Id,$ei,i) { set $CarEx($Id,${ei}_$I,$What) [lindex $LesNew $I-1] } } } } } foreach Id $CarEx(LISTOF,ce,id) { if {[info exists CarEx($Id,listof,nom)]} { set CarEx($Id,listof,nom) [lsort -unique $CarEx($Id,listof,nom)] } } set CarEx(LISTOF,nom,) [lsort -unique $CarEx(LISTOF,nom,)] set CarEx(LISTOF,ce,) $CarEx(LISTOF,ce,id) set CarEx(LISTOF,id,) $CarEx(LISTOF,ce,id) return [CarEx $Qui $Quoi $Quid] } proc CreateMacsimXmlNuc {} { NousAllonsAuBoulot [RepertoireDuGenome] file mkdir macsimXmlNuc file mkdir macsimXmlNuc3 file mkdir macsimXmlNucProt3 foreach Nom [ListeDesPABs] { Espionne $Nom foreach Type [list Nuc Nuc3 NucProt3] { set Msf [GscopeFile $Nom msf$Type] set Mac [GscopeFile $Nom macsimXml$Type] exec clustalw $Msf -convert -output=macsims -outfile=$Mac -outorder=input } } OnRevientDuBoulot return Bravo } proc CreateNucAliFromProtAliBBSCPourTous {} { foreach Nom [ListeDesPABs] { CreateNucAliFromProtAliBBSC $Nom } return Bravo } proc CreateNucAliFromProtAliBBSC Nom { file mkdir "[RepertoireDuGenome]/nucalitfa" file mkdir "[RepertoireDuGenome]/msfNuc" file mkdir "[RepertoireDuGenome]/msfNuc3" file mkdir "[RepertoireDuGenome]/msfNucProt3" set FichierMsfProt [GscopeFile $Nom msfProt] if {[FileAbsent $FichierMsfProt]} { return "" } set MsfProt [ContenuDuFichier $FichierMsfProt] if {[regexp "^NoSequencesToAlign" $MsfProt]} { return "" } set FichierMsfTfa [TmpFile].tfa exec clustalw $FichierMsfProt -convert -output=FASTA -outfile=$FichierMsfTfa set LeMsfTfaNuc {} foreach Access [LaSequenceDuTFAs $FichierMsfTfa "LaListeDesAccess"] { set SeqProtTfa [QueLaSequenceDuTexteTFA [bbsc $Access prottfa content]] set SeqNucTfa [QueLaSequenceDuTexteTFA [bbsc $Access nuctfa content]] Espionne $Nom $Access [expr [string length $SeqProtTfa]*3] [string length $SeqNucTfa] if {$SeqNucTfa==""} { Espionne "$Access est vide" ; continue } set I -1 foreach P [split $SeqProtTfa ""] {a b c} [split $SeqNucTfa ""] { incr I set Codon "$a$b$c" set CodonEn($I) $Codon #Espionne $Access $I $P [AAduCodon $Codon] $Codon } set TfaProt [LaSequenceDuTFAs $FichierMsfTfa $Access] set SeqProt [QueLaSequenceDuTexteTFA $TfaProt] set LaSeqNucAlignee {} set I -1 foreach A [split $SeqProt ""] { if {$A=="-" || $A=="."} { lappend LaSeqNucAlignee "---" ; continue } incr I #Espionne $I $A if { ! [info exists CodonEn($I)]} { set CodonEn($I) "NNN" } set Codon [string toupper $CodonEn($I)] lappend LaSeqNucAlignee $Codon } lappend LeMsfTfaNuc ">$Access homolog from $Nom nucaligment from protaligment" lappend LeMsfTfaNuc [join $LaSeqNucAlignee ""] } set FichierNucAliTfa [SauveLesLignes $LeMsfTfaNuc dans [GscopeFile $Nom "nucalitfa"]] set FichierMsfNuc [GscopeFile $Nom "msfNuc"] exec clustalw $FichierNucAliTfa -convert -output=gscope -outfile=$FichierMsfNuc MsfOnOneLine $Nom msfNuc msfNuc3 "" ShowCodons MsfOnOneLine $Nom msfNuc msfNucProt3 "" ShowCodons return $FichierMsfNuc } proc bbsc {{Qui ""} {Quoi ""} {Quid ""}} { global bbsc if {[string equal -nocase $Qui "Help"] || ($Qui=="" && $Quoi=="")} { set LeHelp {} lappend LeHelp "bbsc help" lappend LeHelp "bbsc listof nom" lappend LeHelp "bbsc listof acid" lappend LeHelp "bbsc listof ac" lappend LeHelp "bbsc listof id" lappend LeHelp "bbsc listof human acid" lappend LeHelp "bbsc listof human ac" lappend LeHelp "bbsc listof human id" lappend LeHelp "bbsc listof cds count" lappend LeHelp "bbsc listof human acid" lappend LeHelp "bbsc BBSC01 aliprot" lappend LeHelp "bbsc BBSC01 alinuc" lappend LeHelp "bbsc BBSC01 msf ... or msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf" lappend LeHelp "bbsc BBSC01 acid" lappend LeHelp "bbsc BBSC01 ac" lappend LeHelp "bbsc BBSC01 id" lappend LeHelp "bbsc BBSC01 cds count" lappend LeHelp "bbsc BBSC01 listof org" lappend LeHelp "bbsc BBSC01 listof copsaliprot" lappend LeHelp "bbsc BBSC01 listof acid" lappend LeHelp "bbsc BBSC01 listof ac" lappend LeHelp "bbsc BBSC01 listof id" lappend LeHelp "bbsc BBSC01 listof cds" lappend LeHelp "bbsc BBSC01 listof missingcds" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nom" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE acid" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE ac" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE id" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa filename" lappend LeHelp "bbsc Q9DBI2_BBS10_MOUSE nuctfa content" lappend LeHelp "bbsc BBS10_MOUSE nom" lappend LeHelp "bbsc BBS10_MOUSE acid" lappend LeHelp "bbsc BBS10_MOUSE ac" lappend LeHelp "bbsc BBS10_MOUSE id" lappend LeHelp "bbsc BBS10_MOUSE nuctfa filename" lappend LeHelp "bbsc BBS10_MOUSE nuctfa content" lappend LeHelp "bbsc Q9DBI2 nom" lappend LeHelp "bbsc Q9DBI2 acid" lappend LeHelp "bbsc Q9DBI2 ac" lappend LeHelp "bbsc Q9DBI2 id" lappend LeHelp "bbsc Q9DBI2 nuctfa filename" lappend LeHelp "bbsc Q9DBI2 nuctfa content" lappend LeHelp "bbsc MOUSE listof acid" lappend LeHelp "bbsc listof content" return [join $LeHelp "\n"] } set LesContent [list aliprot alinuc nuctfa prottfa msf msfProt msfNuc msfNuc3 msfNucProt3 macsimXml macsimRsf] set bbsc(listof,content,) $LesContent if {[info exists bbsc($Qui,$Quoi,$Quid)]} { return $bbsc($Qui,$Quoi,$Quid) } if {[info exists bbsc("EstCharge")]} { if {$Quid=="" && [lsearch $LesContent $Quoi]>=0 } { set Quid "content" } if {[info exists bbsc($Qui,acid,)] && $bbsc($Qui,acid,)!=$Qui} { set AcId $bbsc($Qui,acid,) if {[info exists bbsc($AcId,$Quoi,$Quid)]} { return [bbsc $AcId $Quoi $Quid] } if {$Quid=="content" && [info exists bbsc($AcId,nom,)] && [info exists bbsc($bbsc($AcId,nom,),$Quoi,filename)]} { return [ContenuDuFichier $bbsc($bbsc($AcId,nom,),$Quoi,filename)] } } if {[info exists bbsc($Qui,nom,)]} { if {[info exists bbsc($bbsc($Qui,nom,),$Quoi,$Quid)]} { return [bbsc $bbsc($Qui,nom,) $Quoi $Quid] } if {$Quid=="content" && [info exists bbsc($bbsc($Qui,nom,),$Quoi,filename)]} { return [bbsc $bbsc($Qui,nom,) $Quoi $Quid] } } if {$Quid=="content" && [info exists bbsc($Qui,$Quoi,filename)]} { return [ContenuDuFichier $bbsc($Qui,$Quoi,filename)] } return "" } set bbsc("EstCharge") 1 set RG [RepertoireDuGenome] foreach Nom [ListeDesPABs] { lappend bbsc(listof,nom,) $Nom foreach Content $LesContent { set bbsc($Nom,$Content,filename) "$RG/$Content/$Nom" } set FicAliprot "$RG/aliprot/$Nom" set FicAlinuc "$RG/alinuc/$Nom" set bbsc($Nom,alinuc,filename) $FicAliprot set LesCopAli [LaSequenceDuTFAs $FicAliprot "LaListeDesAccess"] set bbsc($Nom,listof,copsaliprot) $LesCopAli set bbsc($Nom,acid,) [lindex $LesCopAli 0] set RepCopsNuctfa "$RG/copsnuctfa/$Nom" set RepCopsProttfa "$RG/copsprottfa/$Nom" set bbsc($Nom,listof,cds) {} set bbsc($Nom,listof,missingcds) {} set AcIdHuman "" foreach AcId $LesCopAli { lassign [split $AcId "_"] A I O set Ac $A set Id ${I}_$O lappend bbsc($Nom,listof,acid) $AcId lappend bbsc($Nom,listof,ac) $Ac lappend bbsc($Nom,listof,id) $Id if {$AcIdHuman==""} { set AcIdHuman $AcId set IdHuman $Id set AcHuman $Ac set bbsc($Nom,acid,) $AcIdHuman set bbsc($Nom,ac,) $AcHuman set bbsc($Nom,id,) $IdHuman lappend bbsc(listof,human,acid) $AcIdHuman lappend bbsc(listof,human,ac) $AcHuman lappend bbsc(listof,human,id) $IdHuman } lappend bbsc($Nom,listof,org) $O lappend bbsc(listof,org,) $O lappend bbsc($O,listof,nom) $Nom lappend bbsc($O,listof,acid) $AcId lappend bbsc($AcId,nom,) $Nom lappend bbsc($AcId,acid,) $AcId lappend bbsc($AcId,ac,) $Ac lappend bbsc($AcId,id,) $Id lappend bbsc($Ac,acid,) $AcId lappend bbsc($Id,acid,) $AcId lappend bbsc(listof,acid,) $AcId lappend bbsc(listof,ac,) $Ac lappend bbsc(listof,id,) $Id set FicCopProttfa "$RepCopsProttfa/$AcId" set bbsc($AcId,prottfa,filename) $FicCopProttfa set FicCopNuctfa "$RepCopsNuctfa/$AcId" if {[file exists $FicCopNuctfa]} { lappend bbsc($Nom,listof,cds) $AcId set bbsc($AcId,nuctfa,filename) $FicCopNuctfa } else { lappend bbsc($Nom,listof,missingcds) $AcId } } set P [llength $bbsc($Nom,listof,cds)] set A [llength $bbsc($Nom,listof,missingcds)] set T [expr $A+$P] set PC [expr (100*$A)/$T] set bbsc($Nom,cds,count) [format "%4d %s %3d %s %3d %s %3d%s %s" $T cops $P cds $A missingcds $PC % missing] lappend bbsc(listof,cds,count) "$Nom $bbsc($Nom,cds,count)" set bbsc($Nom,aliprot,filename) "$RG/aliprot/$Nom" set bbsc($Nom,alinuc,filename) "$RG/alinuc/$Nom" } return [bbsc $Qui $Quoi $Quid] } proc CdsFromRefseq Refseq { JeMeSignale set GB [eFetchREST nucleotide $Refseq] DecortiqueGenBank OS OC SeqADN $GB if { ! [regexp {\n +CDS +([^\n]+)\n} $GB Match Limits]} { Espionne $GB FaireLire "pas de cds pour $Refseq !!!!!" return "" } set Cds "" regsub -all {[<>]} $Limits "" Limits if {[regexp {[^0-9\,\.]} $Limits]} { Espionne $GB Espionne $Limits FaireLire "bad limits $Limits !!!!!" return "" } foreach Limit [split $Limits ","] { lassign [split $Limit "."] D Bidon F append Cds [string range $SeqADN $D-1 $F-1] } return $Cds } proc MrnaDesCopainsPourTous {{Liste ""}} { set LaSortie {} set CopsNucTfaDir "[RepertoireDuGenome]/copsnuctfa" set CopsProtTfaDir "[RepertoireDuGenome]/copsprottfa" file mkdir "[RepertoireDuGenome]/copsnuctfa" if {$Liste==""} { set Liste [ListeDesPABs] } foreach Nom $Liste { set MesNucTfa "$CopsNucTfaDir/$Nom" file mkdir $MesNucTfa set LesTfa {} foreach FicProtTfa [lsort [glob -nocomplain "$CopsProtTfaDir/$Nom/*"]] { set Entete [EnteteDuFichierTFA $FicProtTfa] regsub "^>" $Entete "" Entete scan $Entete "%s" Copain lassign [split $Copain "_"] A B O set AC $A set ID ${B}_$O set FicNucTfa "$MesNucTfa/$Copain" if {[file exists $FicNucTfa]} { continue } Espionne ya pas $FicNucTfa #set LesTfa [MrnasFrom $AC] set Refseqs [IDMapping ACC+ID REFSEQ_NT $AC] foreach LesRefseq $Refseqs { foreach Refseq $LesRefseq { if {$Refseq!=""} { set Cds [CdsFromRefseq $Refseq] set Entete "$Refseq CDS from REFSEQ_NT $Refseq from ACC $AC from $Nom" set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"] lappend LesTfa $Tfa } } } #rR On prend aussi les info de Nicolas !!! set IdT [NicoMapping $AC IdT] if {$IdT!=""} { set Cds [CdsFromDec2016 $IdT] set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom" set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"] lappend LesTfa $Tfa } else { set IdT [NicoMapping $ID IdT] if {$IdT!=""} { set Cds [CdsFromDec2016 $IdT] set Entete "$IdT CDS from dec2016.ensembl $IdT from ACC $AC from $Nom" set Tfa [SequenceFormatTFA $Cds $Entete "nucbrut"] lappend LesTfa $Tfa } } Espionne $AC $ID =$IdT= if {$LesTfa!={}} { set Pro [QueLaSequenceDuFichierTFA $FicProtTfa] set Max [string length $Pro] set LesMismatch {} foreach Tfa $LesTfa { set Ent [EnteteDuTexteTFA $Tfa] set Nuc [QueLaSequenceDuTexteTFA $Tfa] set OriLength [expr [string length $Nuc]/3] set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]] set NucTraduit [SeqNucToSeqPro $Nuc] set N 0 foreach P [split $Pro ""] T [split $NucTraduit ""] { if {$P==$T} { incr N } } set Mismatch [expr $Max -$N] set IsoformDuMismatch($Mismatch) $Tfa lappend LesMismatch $Mismatch } set LesMismatch [lsort $LesMismatch] set Mismatch [lindex $LesMismatch 0] set Tfa $IsoformDuMismatch($Mismatch) set Seq [QueLaSequenceDuTexteTFA $Tfa] set EnteteIsoform [EnteteDuTexteTFA $Tfa] set Score [expr (100.*$Mismatch)/$Max] regsub "^>" $EnteteIsoform "$Entete " EnteteMrna append EnteteMrna " $Mismatch/$Max mismatch [format {%5.2f} $Score]%" if {$Score>9} { continue } Espionne Youpiiiiiiiii $EnteteMrna $FicNucTfa Sauve [SequenceFormatTFA $Seq $EnteteMrna nucbrut] dans $FicNucTfa lappend LaSortie "$Nom $Copain $EnteteIsoform" } } } return $LaSortie } proc MrnaFromPourTous {} { set LaSortie {} file mkdir "[RepertoireDuGenome]/nuctfa" foreach Nom [ListeDesPABs] { set FicNucTfa [GscopeFile $Nom "nuctfa"] set FicProtTfa [GscopeFile $Nom "prottfa"] if {[file exists $FicNucTfa]} { set Nuc [QueLaSequenceDuTFA $FicNucTfa] regsub -nocase {(TAA|TAG|TGA)$} $Nuc "" Nuc set Pro [QueLaSequenceDuTFA $FicProtTfa] if {[string length $Nuc]!=[string length $Pro]*3} { FaireLire "$Nom [string length $Nuc]!=[string length $Pro]*3" } set Nuc [string range $Nuc 0 [expr [string length $Pro]*3-1]] if {[string length $Nuc]==[string length $Pro]*3} { set NucTraduit [SeqNucToSeqPro $Nuc] if {$Pro==$NucTraduit} { #FaireLire "j'ai coupe c'est bon" } else { set N 0 foreach P [split $Pro ""] T [split $NucTraduit ""] { if {$P!=$T} { incr N } } FaireLire "$N ratés pour $Nom\n$Pro\n$NucTraduit" } } continue } set LesTfa [MrnasFrom $Nom] if {$LesTfa=={}} { Espionne "je ne rouve rien du tout pour $Nom" ; continue } set Seq [MrnaFrom $Nom] set Seq [string toupper $Seq] lappend LaSortie $Nom set AC [ExtraitInfo $Nom "AC:"] set ID [ExtraitInfo $Nom "AC:"] Sauve [SequenceFormatTFA $Seq "$Nom ${AC}_$ID" nucbrut] dans [GscopeFile $Nom "nuctfa"] } return $LaSortie } proc CdsFromDec2016 ENST { set Seq [ContenuDuFichier "https://dec2016.rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"] return $Seq } proc MrnasFrom {Qui {FirstSeqOnly ""}} { #rR On rend les TFA de tous les mRNA de Qui #rR sauf si FirstSeqOnly set FirstSeqOnly [string equal -nocase "FirstSeqOnly" $FirstSeqOnly] set LesMrnas {} set Nom $Qui if {[EstUnPAB $Qui]} { set Qui [ExtraitInfo $Qui "AC:"] } return $Seq set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui] 0] if {$CCDSs=={}} { #rR il se peut qu'il y ait des extension -1 -2 ... foreach I [NombresEntre 0 9] { set Ext "-$I" if {$I==0} { set Ext "" } set CCDSs [lindex [IDMapping ACC+ID CCDS $Qui$Ext] 0] if {$CCDSs!={}} { break } } if {$CCDSs=={}} { return "" } } foreach CCDS $CCDSs { set ENSTs [lindex [IDMapping CCDS ENSEMBL_TRS $CCDS] 0] if {$ENSTs=={}} { continue } foreach ENST $ENSTs { #Espionne $Nom $Qui $CCDS $ENST #Espionne ContenuDuFichier "\"https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain\"" set Seq [ContenuDuFichier "https://rest.ensembl.org/sequence/id/$ENST?type=cds&content-type=text/plain"] if {$Seq!=""} { if {$FirstSeqOnly} { return $Seq } set Entete "$ENST mRNA from CCDS $CCDS from ACC $Qui from $Nom" lappend LesMrnas [SequenceFormatTFA $Seq $Entete "nucbrut"] } } } return $LesMrnas } #rR voir plus bas BlastAli BlastIndel BlastStat et Blastomics proc DGB {} { set Xrefs "" set Texte [lindex [eFetchREST protein "np_001297087"] 0] DecortiqueGenBank OS OC Seq $Texte 0 0 "" "" DbXrefs Espionne $DbXrefs Espionne $Seq return DGB } proc AliIndel {{Qui ""} {Quoi ""} {FicAli ""}} { global AliIndel #rR Attention si FicAli a changé on recharge tout if {$FicAli!=""} { if {[info exists AliIndel("FicAli",)] && $AliIndel("FicAli",)!=$FicAli} { if {[info exists AliIndel]} { unset AliIndel } } set AliIndel("FicAliMemo") $FicAli } if {[info exists AliIndel($Qui,$Quoi)]} { return $AliIndel($Qui,$Quoi) } if {[info exists AliIndel("EstCharge")]} { return "" } set AliIndel("EstCharge") 1 set AliIndel(FicAli,) $AliIndel("FicAliMemo") set Query "" foreach Access [LaSequenceDuTFAs $AliIndel(FicAli,) "LaListeDesAccess"] { set SeqGap [LaSequenceDuTFAs $AliIndel(FicAli,) $Access] regsub {^[^_]+_} $Access "" AC regsub -all {[^A-Z]} $SeqGap "" Seq set LE [string length $SeqGap] lappend AliIndel(ListOf,AC) $AC set AliIndel($AC,LE) $LE set AliIndel($AC,SS) $SeqGap set AliIndel($AC,OS) "OS"; #[GeneQuid UniprotData $Access OS] set AliIndel($AC,DE) "DE"; #[GeneQuid UniprotData $Access DE] if {$Query==""} { set AliIndel(Query,LE) $LE set AliIndel(Query,SQ) $Seq set AliIndel(Query,GQ) $SeqGap } } return [AliIndel $Qui $Quoi] } proc AliStatPourTous {} { set LaSortie {} foreach Nom [ListeDesPABs] { set R [AliStat $Nom] lappend LaSortie $R } return $LaSortie } proc AliStat {{FichierAli ""} {CouOut ""} {PilOut ""}} { #rR sur le modele de BlastStat qui créait un ali avec blast if {[EstUnPAB $FichierAli]} { set Nom $FichierAli set FichierAli [GscopeFile $Nom "aliprot"] set RepCouvert "[RepertoireDuGenome]/Couvert" set RepPiliers "[RepertoireDuGenome]/Piliers" file mkdir $RepCouvert file mkdir $RepPiliers set AC [ExtraitInfo $Nom "AC:"] set ID [ExtraitInfo $Nom "ID:"] set ACID "${AC}_$ID" if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$ACID.csv" } if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$ACID.csv" } } else { if {$CouOut==""} { set CouOut $FichierAli.Couvert.csv } if {$PilOut==""} { set PilOut $FichierAli.Piliers.csv } } AliIndel "" "" $FichierAli set QL [AliIndel Query LE] foreach I [NombresEntre 1 $QL] { set TaillePilier($I) 0 } set PN 0.0 foreach Access [AliIndel ListOf AC $FichierAli] { set LE [AliIndel $Access LE] set SS [AliIndel $Access SS] set DE [AliIndel $Access DE] ; regsub -all ";" $DE "," DE set OS [AliIndel $Access OS] set LesZones [regexp -all -indices -inline {[A-Z]+} $SS] set Couverture 0 foreach Zone $LesZones { lassign $Zone D F set LZ [expr $F-$D+1] incr Couverture $LZ foreach I [NombresEntre $D $F] { incr TaillePilier($I) } } set PC [expr round(100.*$Couverture/$LE)] set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE] lappend LesCouvert [join $LaLigne ";"] } set Max 0 foreach I [NombresEntre 1 $QL] { set T $TaillePilier($I) set Max [Maxi $Max $T] } foreach I [NombresEntre 1 $QL] { set T $TaillePilier($I) set L [expr (200*$T)/$Max + 1] set Ligne [string repeat "*" $L] #Espionne [format %4d $I] $Ligne lappend LesPiliers "$I;$T" } if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut } if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut } return [list $CouOut $PilOut] } proc InformeBBSCPourTous {} { file mkdir "[RepertoireDuGenome]/infos" foreach Nom [ListeDesPABs] { #Espionne $Nom set F [GscopeFile $Nom prottfa] set TFA [ContenuDuFichier $F] scan $TFA "%s %s" N AccessId lassign [split $AccessId "_"] Access I H if {$H!="HUMAN"} { FaireLire "$AccessId pas bon" } set ClesEmbl [GeneQuid UniprotData $Access "+AC,+ID,+GN,+DE"] set AC "" set ID "" set GN "" set DE "" foreach Ligne [split $ClesEmbl "\n"] { if { ! [regexp {^([A-Z][A-Z]) (.*)$} $Ligne Match K V]} { FaireLire $Ligne; continue } if {[set $K]!=""} { continue } set $K $V } regsub {;.*$} $AC "" AC regsub { .*$} $ID "" ID if {$Access!=$AC} { FaireLire "$Access\n$ClesEmbl" } set OriId ${I}_$H set BetterId "" if {$ID!="${I}_$H"} { #FaireLire "Better id $ID for $OriId" set BetterId $ID } Espionne $AC $Access Espionne $ID $OriId regexp {Name=([^; ]+)[; ]} $GN Match GN regexp {Full=([^;]+);} $DE Match DE Espionne $GN Espionne $DE InformeSansDemander $Nom "AC: $AC\nID: $OriId\nGN: $GN\nDE: $DE\nValiGN: $GN\nValiDE: $DE" if {$BetterId!=""} { InformeSansDemander $Nom "BetterID: $ID" } } return bravo } proc CreateMsfAndMacsimFromTfaPourTous {} { file mkdir "[RepertoireDuGenome]/msf" file mkdir "[RepertoireDuGenome]/macsimXml" file mkdir "[RepertoireDuGenome]/macsimRsf" foreach Nom [ListeDesPABs] { Espionne $Nom set F [GscopeFile $Nom aliprot] set M [GscopeFile $Nom msf] set X [GscopeFile $Nom macsimXml] set R [GscopeFile $Nom macsimRsf] exec clustalw $F -convert -output=gscope -outfile=$M #exec clustalw $F -convert -output=macsim -outfile=$X #exec clustalw $F -convert -output=RSF -outfile=$R } } proc CreateProjectBBSC {} { set Rep "/genomics/link/BBSC" NousAllonsAuBoulot $Rep file mkdir aliprot file mkdir fiches file mkdir prottfa file mkdir copsprottfagap file mkdir copsprottfa set PF "BBSC" set I 0 foreach FicAliprot [lsort [glob -nocomplain "DeKirsley/*.tfa"]] { incr I set Nom [format "%s%2.2d" $PF $I] set FicAli "aliprot/$Nom" file copy -force $FicAliprot $FicAli set Human "" foreach Access [LaSequenceDuTFAs $FicAli "LaListeDesAccess"] { Espionne $Access set SeqGap [QueLaSequenceDuTFAs $FicAli $Access] regsub -all {[-\.~]} $SeqGap "" Seq if {$Human==""} { set Human $Access set TFA [SequenceFormatTFA $Seq "$Nom $Human" protbrut] Sauve $TFA dans "prottfa/$Nom" } file mkdir copsprottfa/$Nom file mkdir copsprottfagap/$Nom set TFA [SequenceFormatTFA $Seq "$Access homolog to $Human $Nom" protbrut] set TFAGap [SequenceFormatTFA $SeqGap "$Access homolog to $Human $Nom" protbrut] Sauve $TFA dans "copsprottfa/$Nom/$Access" Sauve $TFAGap dans "copsprottfagap/$Nom/$Access" } lappend LesNoms $Nom } CreeBornesDesPABsTroisGradins 1 $I $PF "" "" "2.2d" "." OnRevientDuBoulot return CreateProjectBBSC } #rR ici on a ce qu'il faut pour traiter les blast de Claudine ... BlastAli BlastIndel BlastStat proc BlastAliStatPourTous {} { set LaSortie {} foreach Nom [ListeDesPABs] { #lappend LaSortie [BlastAli $Nom] lappend LaSortie [BlastStat $Nom] } return $LaSortie } proc OrganiseLesToposDeClaudine {} { set LaSortie {} if {0} { foreach Nom [ListeDesPABs] { set Embl [ContenuDuFichier [GscopeFile $Nom protembl]] if { ! [regexp {ID ([^ ]+)( |$)} $Embl Match ID]} { FaireLire "pas de ID dans $Nom" } if { ! [regexp {\nAC ([^ ;]+)(;| |$)} $Embl Match AC]} { FaireLire "pas de AC dans $Nom" } if { ! [regexp {\nOX ([^ ;]+)(;| |$)} $Embl Match OX]} { FaireLire "pas de OX dans $Nom" } regsub -all {[^0-9]} $OX "" OX InformeSansDemander $Nom "=ID: $ID" InformeSansDemander $Nom "=Alias: $ID" InformeSansDemander $Nom "=AC: $AC" InformeSansDemander $Nom "=OX: $OX" lappend LaSortie "$Nom $ID $AC $OX" } return $LaSortie } set RepBlastOri "[RepertoireDuGenome]/blastOri" set RepTfaFromBOri "[RepertoireDuGenome]/tfaFromBOri" set RepMemo "[RepertoireDuGenome]/memo" file mkdir $RepBlastOri file mkdir $RepTfaFromBOri file mkdir $RepMemo NousAllonsAuBoulot [RepertoireDuGenome] foreach Nom [ListeDesPABs] { set ID [ExtraitInfo $Nom "ID:"] set RepTravail ${ID}_Arch set BlastOriOri "$RepTravail/${ID}_Arch.blast" Espionne $BlastOriOri set BlastOri [GscopeFile $Nom "blastOri"] file copy -force $BlastOriOri $BlastOri lappend LaSortie $BlastOri } OnRevientDuBoulot return $LaSortie } proc LesOsDesAcDeClaudine {} { foreach Ligne [LesLignesDuFichier "/genomics/link/Topos/TOPO6B/LesAccessAvecCaca.txt"] { regsub {>} $Ligne "" Ligne regsub {_[^_]+$} $Ligne "" Ligne #Espionne $Ligne lappend LesBonsAccess $Ligne } set Retour [GeneQuid UniprotData [join $LesBonsAccess ","] "+QY,+OS"] set LeOsCourant {} set OldQY "" set LaSortie {} foreach Ligne [split $Retour "\n"] { if {[regexp {QY (.*)$} $Ligne Match Access]} { if {$OldQY!=""} { set OsCourant [join $LeOsCourant " "] if {$OsCourant==""} { regsub {_.+$} $Access "" Mieux set OS [join [GeneQuid UniprotData $Mieux OS] " "] if {$OS!=""} { set OsCourant $OS } else { Espionne "Rien pour $Access" } } lappend LaSortie "$OldQY $OsCourant" set LeOsCourant {} } set OldQY $Access } if {[regexp {OS (.*)$} $Ligne Match OS]} { set OS [string trim $OS] lappend LeOsCourant $OS } } set Fichier [SauveLesLignes $LaSortie dans "/genomics/link/Topos/TOPO6B/QyAc.txt"] return $Fichier } proc BlastStat {{FichierBlast ""} {CouOut ""} {PilOut ""}} { if {[EstUnPAB $FichierBlast]} { set Nom $FichierBlast set FichierBlast [GscopeFile $Nom "blastOri"] set RepCouvert "[RepertoireDuGenome]/Couvert" set RepPiliers "[RepertoireDuGenome]/Piliers" file mkdir $RepCouvert file mkdir $RepPiliers set Alias [ExtraitInfo $Nom "Alias" ] if {$CouOut==""} { set CouOut "$RepCouvert/${Nom}-$Alias.csv" } if {$PilOut==""} { set PilOut "$RepPiliers/${Nom}-$Alias.csv" } } else { if {$CouOut==""} { set CouOut $FichierBlast.Couvert.csv } if {$PilOut==""} { set PilOut $FichierBlast.Piliers.csv } } FromBlastIndel "" "" $FichierBlast set QL [FromBlastIndel Query LE] foreach I [NombresEntre 1 $QL] { set TaillePilier($I) 0 } foreach Access [FromBlastIndel ListOf AC] { set LE [FromBlastIndel $Access LE] set SS [FromBlastIndel $Access SS] set PN [FromBlastIndel $Access PN] set DE [FromBlastIndel $Access DE] ; regsub -all ";" $DE "," DE set OS [FromBlastIndel $Access OS] set LH [FromBlastIndel $Access LH] set LesZones [regexp -all -indices -inline {[A-Z]+} $SS] set Couverture 0 foreach Zone $LesZones { lassign $Zone D F set LZ [expr $F-$D+1] incr Couverture $LZ } set PC [expr round(100.*$LZ/$LE)] #Espionne [format "%-20s %4d / %4d = %3d%s %s" $Access $LZ $LE $PC % $LesZones] set LaLigne [list $Access $LE $LZ $PC $PN $LesZones $OS $DE] foreach H $LH { lassign $H AccessLu nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct foreach I [NombresEntre $DebutQuery $FinQuery] { incr TaillePilier($I) } } lappend LesCouvert [join $LaLigne ";"] } set Max 0 foreach I [NombresEntre 1 $QL] { set T $TaillePilier($I) set Max [Maxi $Max $T] } foreach I [NombresEntre 1 $QL] { set T $TaillePilier($I) set L [expr (200*$T)/$Max + 1] set Ligne [string repeat "*" $L] Espionne [format %4d $I] $Ligne lappend LesPiliers "$I;$T" } if {$CouOut!=""} { SauveLesLignes $LesCouvert dans $CouOut } if {$PilOut!=""} { SauveLesLignes $LesPiliers dans $PilOut } return [list $CouOut $PilOut] } proc TBA {} { set LesAC [list WP_011223034.1 RLE46311.1 WP_048156073.1 WP_048120871.1 WP_116481489.1 WP_015054741.1 PKL53186.1 KXS40906.1 WP_011307748.1 WP_048117745.1 ABK14390.1 WP_048124872.1 WP_011021597.1 WP_048039201.1 WP_048038741.1 WP_048171932.1 WP_048182357.1 WP_048107223.1 WP_048195132.1 WP_048205782.1 WP_095645014.1 WP_048137421.1] return [BlastAli blast_Ark_Topo6B.txt $LesAC] } proc BlastAliComprime {Fichier} { set I -1 set LesAccess [LaSequenceDuTFAs $Fichier "LaListeDesAccess"] foreach Access $LesAccess { incr I lappend LesI $I set Tfa [LaSequenceDuTFAs $Fichier $Access] set Seq [QueLaSequenceDuTexteTFA $Tfa] set LesC [split $Seq ""] set J -1 foreach C $LesC { incr J set T($I,$J) $C if {$C ne "-"} { lappend LesIduNonVide($J) $I; incr TaillePilier($J) } } } set DernierJ $J set LesJ [NombresEntre 0 $DernierJ] foreach J $LesJ { if { ! [info exists TaillePilier($J)] || $TaillePilier($J)>1} { continue } set I [lindex $LesIduNonVide($J) 0] set K $J set KVide "" set Comprime 0 while {$K<$DernierJ} { incr K if {$T($I,$K) eq "-"} { set KVide $K while {$K > $J} { set T($I,$K) $T($I,[incr K -1]) Espionne T($I,$K) $T($I,$K) } set T($I,$J) "-" set Comprime 1 break } } if {$Comprime} { incr TaillePilier($KVide) set JComprime($J) $I } } set LeTFAs {} set I -1 foreach Access $LesAccess { incr I set LaSeq {} foreach J [NombresEntre 0 $DernierJ] { if {[info exists JComprime($J)]} { continue } lappend LaSeq $T($I,$J) } set Seq [join $LaSeq ""] set SeqOri [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]] if {0 && $SeqOri ne $Seq} { Espionne Espionne $Access Espionne [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $Fichier $Access]] Espionne $Seq } set TFA [SequenceFormatTFA $Seq $Access "protbrut"] #Espionne $TFA lappend LeTFAs $TFA } set New $Fichier regsub {.tfa$} $New "" New append New "_new.tfa" SauveLesLignes $LeTFAs dans $New return $New } proc BlastAli {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""}} { global BlastIndel set DisplayStats 0 set ShowStats 0 if {[regexp -nocase "ShowStats|DisplayStats" $Nieme]} { if {[regexp -nocase "DisplayStats" $Nieme]} { set DisplayStats 1 } set ShowStats 1 set Nieme "" } if {[EstUnPAB $Fichier]} { set Nom $Fichier set Fichier [GscopeFile $Nom "blastOri"] set Memo [GscopeFile $Nom "memo"] set FichierTfa [GscopeFile $Nom "tfaFromBOri"] } else { set Memo "${Fichier}_[join $Nieme _].memo" set FichierTfa $Fichier regsub {.txt$} $FichierTfa "" FichierTfa append FichierTfa .tfa } if {[file exists $Memo]} { array set BlastIndel [ContenuDuFichier $Memo] } else { BlastIndel $Fichier $Nieme $SeuilExpect $MaxListe Sauve [array get BlastIndel] dans $Memo } set GapApres(trululu) "" foreach K [array names BlastIndel "wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww"] { Espionne BlastIndel($K)=$BlastIndel($K)= } #exit set AVoir "WP_049995565.1===================" foreach AC $BlastIndel(ListOf,AC) { set MontreAC 0 if {$AC==$AVoir} { set MontreAC 1 } #Espionne #Espionne $BlastIndel($AC,AC) $BlastIndel($AC,OS) $BlastIndel($AC,LE) $BlastIndel($AC,DE) set IemeH 0 foreach H $BlastIndel($AC,LH) { incr IemeH lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct set IQ [expr $DebutQuery-1] set IS [expr $DebutSbjct-1] set J [expr $IQ-1] if {$MontreAC} { Espionne AC $AC EspionneL $H Espionne IQ=$IQ IS=$IS Espionne SeqQuery Espionne =$SeqQuery= Espionne SeqSbjct Espionne =$SeqSbjct= } foreach AQ [split $SeqQuery ""] AS [split $SeqSbjct ""] { incr J if {$AQ=="-" && $AS=="-"} { set GapAt(Query,$J) 1 set GapAt($Access,$J) 1 #incr GapApres(Q,$AC,$IQ) incr GapApres(S,$AC,$IS) continue } if {$AQ=="-" && $AS!="-"} { set GapAt(Query,$J) 1 incr IS incr GapApres(Q,$AC,$IQ) lappend SousQueryEtendu($IQ) "$AC $IS" #Espionne SousQueryEtendu($IQ) "$AC $IS $GapApres(Q,$AC,$IQ)" continue } if {$AQ!="-" && $AS=="-"} { set GapAt($Access,$J) 1 incr IQ incr GapApres(S,$AC,$IS) continue } if {$AQ!="-" && $AS!="-"} { incr IQ incr IS lappend SousQuery($IQ) "$AC $IS" continue } } } } foreach Key [array names GapAt(*$AVoir*)] { Espionne GapAt($K) $GapAt($K) } #exit set LengthWithGap $BlastIndel(Query,LE) set SeqGap "" set Seq $BlastIndel(Query,SQ) foreach IQ [NombresEntre 0 [expr $BlastIndel(Query,LE)-1]] { set MaxGap 0 foreach K [array names GapApres -glob "Q,*,$IQ"] { set MaxGap [Maxi $MaxGap $GapApres($K)] #Espionne $IQ GapApres($K) $GapApres($K) $MaxGap } if {$ShowStats} { set Message "[format {%4d %3d} $IQ $MaxGap] [string repeat * $MaxGap]" lappend LesStats $Message Espionne $Message } incr LengthWithGap $MaxGap append SeqGap [string index $Seq $IQ-1] append SeqGap [string repeat "-" $MaxGap] } if {$DisplayStats} { return [AfficheListe $LesStats "" "Couvert/$Nom"] } if {$ShowStats} { return "FinShowStats" } #parray SousQuery #parray GapApres #Espionne $LengthWithGap set IQ 0 set J 0 foreach Q [split $SeqGap ""] { set Ali(Query,$J) $Q if {$Q!="-"} { incr IQ } if {[info exists SousQuery($IQ)]} { foreach ACPos $SousQuery($IQ) { scan $ACPos "%s %s" AC Pos set Ali($AC,$J) [string index $BlastIndel($AC,SS) $Pos-1] } unset SousQuery($IQ) } if {[info exists SousQueryEtendu($IQ)]} { if {[info exists iDe]} { unset iDe } foreach ACPos $SousQueryEtendu($IQ) { scan $ACPos "%s %s" AC Pos incr iDe($AC) set Ali($AC,[expr $J+$iDe($AC)]) [string index $BlastIndel($AC,SS) $Pos-1] } unset SousQueryEtendu($IQ) } incr J } set LesLignes {} foreach AC [concat [list Query] $BlastIndel(ListOf,AC)] { set MontreAC 0 set Previous "" set Ligne "" set IS 0 foreach J [NombresEntre 0 [expr $LengthWithGap-1]] { if {$MontreAC} { Espionne $AC $J info exists Ali($AC,$J) [info exists Ali($AC,$J)] [info exists GapAt($AC,$J)] } if {[info exists Ali($AC,$J)]} { append Ligne $Ali($AC,$J) set Previous $Ali($AC,$J) incr IS } elseif {$Previous=="-"} { #Espionne 2 $J - append Ligne "-" set Previous "-" } elseif {[info exists GapAt($AC,$J)]} { #Espionne 3 $J - append Ligne "-" set Previous "-" } else { #Espionne 4 $J . append Ligne . set Previous . } } #Espionne [string range $Ligne 0 250] lappend LesLignes ">$AC" regsub -all {\.} $Ligne "-" Ligne regsub -all {\?} $Ligne "X" Ligne lappend LesLignes $Ligne } return [SauveLesLignes $LesLignes dans $FichierTfa] } proc FromBlastIndel {{Qui ""} {Quoi ""} args} { global BlastIndel #rR Attention si args est non vide on recharge tout if {$args!={}} { if {[info exists BlastIndel]} { unset BlastIndel } BlastIndel {*}$args return [FromBlastIndel $Qui $Quoi] } if {[info exists BlastIndel($Qui,$Quoi)]} { return $BlastIndel($Qui,$Quoi) } if {[info exists BlastIndel("EstCharge")]} { return "" } set BlastIndel("EstCharge") 1 return [FromBlastIndel $Qui $Quoi] } proc BlastIndel {Fichier {Nieme ""} {SeuilExpect ""} {MaxListe ""} {Qui ""} {Quoi ""}} { global BlastIndel if {[info exists BlastIndel]} { unset BlastIndel } ; #rR 2019/01/21 set NiemeEstUnNombre [regexp {^[0-9]+$} $Nieme] set NiemeEstUnAccess [regexp -nocase {[a-z]} $Nieme] if {$NiemeEstUnNombre} { set MaxListe $Nieme } set NbHits [DecortiqueBlast $Fichier $SeuilExpect $MaxListe Query lBanqueId lAccess lDE lProfil lPN lPartieSegAli] set Tout [ContenuDuFichier $Fichier] set QueryLength [IntegerApres "Length=" dans $Tout] if {$QueryLength=="" && ! [regexp {\(([0-9]+) letters\)} $Tout Match QueryLength]} { FaireLire "I cannot find the QueryLength" } Espionne $QueryLength set SQ [string repeat "?" $QueryLength] set N 0 foreach BId $lBanqueId PN $lPN SegAli $lPartieSegAli { incr N #Espionne BId $BId $N/$NbHits $Nieme $NiemeEstUnNombre $NiemeEstUnAccess if {$NiemeEstUnNombre && $N!=$Nieme} { continue } if {$NiemeEstUnAccess && [lsearch $Nieme $BId]<0} { continue } #EspionneL $SegAli set Access "" set Definition "" set OS "Unknown unknown" set Entete [join $SegAli " "] regsub {\n Score = .+$} $Entete "" Entete regsub -all { +} $Entete " " Entete set Entete [string trim $Entete] if { [regexp {>([^ ]+) ([^\[]+)\[([^\]]+)\]} $Entete Match Access Definition OS]} { } else { if { [regexp {>([^ ]+) RecName: Full=(.+) AltName:} $Entete Match Access Definition]} { } else { if {[regexp {>([^ ]+) (.+)$} [lindex $SegAli 0] Match Access Definition]} { } else { Espionne $Entete; FaireLire "Ca merde pour $BId"; continue } } } set Access [string trim $Access] set Definition [string trim $Definition] set OS [string trim $OS] set Length -1 set SS "" lappend SegAli "FinSegAli" "FinSegAli" "FinSegAli" lassign [LectureSegAli $Access $SQ $OS $SegAli] SQ SS Length ListeDesHomologues if {[info exists DejaVuQueryEn]} { unset DejaVuQueryEn } if {[info exists DejaVuSbjctEn]} { unset DejaVuSbjctEn } foreach H $ListeDesHomologues { lassign $H Access nCops nCopsDansCetteSequence OS Score Expect DebutQuery FinQuery DebutSbjct FinSbjct SeqQuery SeqConse SeqSbjct set MontreAC [string equal $Access "WP_049995561.1"] set SeqQueryGarde "" set SeqConseGarde "" set SeqSbjctGarde "" set iQueryCourant [expr $DebutQuery-1] set iSbjctCourant [expr $DebutSbjct-1] if {[info exist DebutQueryGarde]} { unset DebutQueryGarde } if {[info exist DebutSbjctGarde]} { unset DebutSbjctGarde } set OnGarde 0 if {[info exists DejaVuQueryLocalEn]} { unset DejaVuQueryLocalEn } if {[info exists DejaVuSbjctLocalEn]} { unset DejaVuSbjctLocalEn } foreach RQ [split $SeqQuery ""] RC [split $SeqConse ""] RS [split $SeqSbjct ""] { if {$RQ!="-"} { incr iQueryCourant if { ! [info exists DebutQueryGarde]} { set DebutQueryGarde $iQueryCourant set OnGarde 1 } set FinQueryGarde $iQueryCourant } if {$RS!="-"} { incr iSbjctCourant if { ! [info exists DebutSbjctGarde]} { set DebutSbjctGarde $iSbjctCourant set OnGarde 1 } set FinSbjctGarde $iSbjctCourant } #Espionne =====================================================================$nCops=$RQ=$RS=DejaVuQueryEn($iQueryCourant) DejaVuSbjctEn($iSbjctCourant) [info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)] if {[info exists DejaVuQueryEn($iQueryCourant)] || [info exists DejaVuSbjctEn($iSbjctCourant)]} { continue } #Espionne =$RQ=$RS=$OnGarde= if {$OnGarde} { append SeqQueryGarde $RQ append SeqConseGarde $RC append SeqSbjctGarde $RS } set DejaVuQueryLocalEn($iQueryCourant) 1 set DejaVuSbjctLOcalEn($iSbjctCourant) 1 set FinQueryGarde $iQueryCourant set FinSbjctGarde $iSbjctCourant } set HomologieGarde [list $Access $nCops $nCopsDansCetteSequence $OS $Score $Expect $DebutQueryGarde $FinQueryGarde $DebutSbjctGarde $FinSbjctGarde $SeqQueryGarde $SeqConseGarde $SeqSbjctGarde] lappend ListeDesHomologuesGarde $HomologieGarde lappend BlastIndel($Access,LH) $HomologieGarde foreach I [array names DejaVuQueryLocalEn] { set DejaVuQueryEn($I) 1 } foreach I [array names DejaVuSbjctLocalEn] { set DejaVuSbjctEn($I) 1 } } set LH [lsort -integer -index 6 $ListeDesHomologuesGarde] lappend BlastIndel(ListOf,AC) $Access set BlastIndel($Access,AC) $Access set BlastIndel($Access,PN) $PN set BlastIndel($Access,DE) $Definition set BlastIndel($Access,OS) $OS set BlastIndel($Access,SS) $SS set BlastIndel($Access,LE) $Length } set BlastIndel(Query,LE) $QueryLength set BlastIndel(Query,SQ) $SQ if {$Qui!=""} { return $BlastIndel($Qui,$Quoi) } return "BlastIndel" } if {[OnTraite CilioCarta]} { source /home/ripp/gscope/gscope_blome.tcl } proc BlastomicsDir {} { return "/home/ripp/www/blastomics" } proc BlastomicsDbDir {} { return "[BlastomicsDir]/db" } proc TestBlastomicsSql {Projet Phylum} { set LeSql [list] lappend LeSql "SELECT COUNT(ln_organism_taxobla.pk_taxobla), specie, taxid, ln_organism_taxobla.pk_organism, clade.nom " lappend LeSql "FROM ln_organism_taxobla join taxobla ON taxobla.pk_taxobla =ln_organism_taxobla.pk_taxobla " lappend LeSql "JOIN organism ON organism.pk_organism= ln_organism_taxobla.pk_organism " lappend LeSql "JOIN ln_clade_organism ON ln_clade_organism.pk_organism = organism.pk_organism " lappend LeSql "JOIN clade ON clade.pk_clade=ln_clade_organism.pk_clade " lappend LeSql "GROUP BY organism.pk_organism ORDER BY clade.pk_clade" set Sql [join $LeSql " "] set R [SqlExecForDatabase "[BlastomicsDbDir]/${Projet}_${Phylum}.db" $Sql GetList] EspionneL $R exit } proc BlastomicsCladesClaudineNeSertPlus {} { #rR attention il y a ausi des choses qui ressemblent dans gscope_oi ... set LesPhylum [SqlExecForDatabase "[BlastomicsDbDir]/AHalma_Archaea.db" "select nom from clade order by nom" "GetList"] set LesPhylum [OrderedClades $LesPhylum] foreach PhName $LesPhylum { set PhTaxId [Tax $PhName TaxId] set PhClass [TaxClass $PhTaxId Name] Espionne [format "%10d %-30s %s" $PhTaxId $PhName $PhClass] } OiDomain Archaea foreach OX [OiCode ListOf OX] { set OS [OiCode $OX OS] Espionne "$OX $OS" } } proc CanalSqlTaxobla {{Bdd ""}} { if {$Bdd!="" && [FileAbsent $Bdd]} { set Bdd "[BlastomicsDbDir]/$Bdd" } Espionne $Bdd set Queue [file tail $Bdd] if { "[CanalSql "GetDbname"].db" != $Queue } { Espionne [CanalSql "GetDbname"] Espionne $Queue CanalSqlDisconnect CanalSql [ConnInfoForDatabase $Bdd] } return [CanalSql] } proc BlastomicsFilterTaxobla {{Bdd ""} {ListOfPkOrg ""} {ListOfNot ""} {ListOfCladeCounts ""}} { #rR Attention cette procédure se rappelle elle-même pour les Not CanalSqlTaxobla $Bdd set UseUnion [string equal -nocase $ListOfCladeCounts "UseUnion"] LogWscope $ListOfPkOrg LogWscope $ListOfNot if {$ListOfPkOrg=="-"} { set ListOfPkOrg "" } if {$ListOfNot =="-"} { set ListOfNot "" } if {[regexp {[^0-9]} $ListOfPkOrg]} { regsub -all {[^0-9]+} $ListOfPkOrg "," ListOfPkOrg set ListOfPkOrg [split [string trim $ListOfPkOrg ","] ","] } if {$ListOfPkOrg==""} { set LesPkTot [SqlExec "select pk_taxobla from taxobla" "GetList"] } else { set PremiereFois 1 foreach PkO $ListOfPkOrg { set Sql "select pk_taxobla from ln_organism_taxobla where pk_organism==$PkO" set LesPkT [SqlExec $Sql "GetList"] if {$PremiereFois} { set LesPkTot $LesPkT set PremiereFois 0 } else { if {$UseUnion} { set LesPkTot [ListsUnion $LesPkTot $LesPkT] } else { set LesPkTot [ListsIntersection $LesPkTot $LesPkT] } } } } if {$ListOfNot=="GetListOfPk"} { return $LesPkTot } if {$ListOfNot!=""} { set LesNot [BlastomicsFilterTaxobla $Bdd $ListOfNot "GetListOfPk" "UseUnion"] set LesPkTot [ListsComplement $LesPkTot $LesNot] } #####################if {$ListOfCladeCounts==""} { return $LesPkTot } #rR on suppose qu'il n 'y a pas de Absence checkée si un nombre de species est demandé. #rR donc on peut filter sur le nombre voulu. set LesNbHitsDesClades {} foreach CC [split $ListOfCladeCounts ","] { LogWscope $CC set CountText "" lassign [split $CC "/"] Clade CountText LogWscope "$Clade $CountText" set LesBornes [split $CountText "-"] set D [lindex $LesBornes 0] set F [lindex $LesBornes end] set LesBonsNb {} set LeSql {} lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade " lappend LeSql " where " lappend LeSql " '$Clade' = clade.nom " lappend LeSql " and clade.pk_clade = sapin.pk_clade " lappend LeSql " and sapin.pk_sapin = branche.pk_sapin " lappend LeSql " and branche.pk_branche = ln_branche_taxobla.pk_branche " lappend LeSql " and branche.id_branche >= $D and branche.id_branche <= $F" set Sql [join $LeSql " "] regsub -all { +} $Sql " " Sql LogWscope $Sql set LesBonsNb [SqlExec $Sql "GetList"] set Nb [llength $LesBonsNb] LogWscope "$Nb trouves" lappend LesNbHitsDesClades "$Clade=$D-$F" set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb] } #foreach CC [split $ListOfCladeCounts ","] { # LogWscope $CC # scan $CC "%d/%s" PkClade CountText # set LesBornes [split $CountText "-"] # set D [lindex $LesBornes 0] # set F [lindex $LesBornes end] # set LesBonsNb {} # set LeSql {} # lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade " # lappend LeSql " where " # lappend LeSql " $PkClade = sapin.pk_clade " # lappend LeSql " and sapin.pk_sapin = branche.pk_sapin " # lappend LeSql " and branche.pk_branche = ln_branche_taxobla.pk_branche " # lappend LeSql " and branche.id_branche >= $D and branche.id_branche <= $F" # set Sql [join $LeSql " "] # regsub -all { +} $Sql " " Sql # Espionne $Sql # set LesBonsNb [SqlExec $Sql "GetList"] # set Nb [llength $LesBonsNb] # lappend LesNbHitsDesClades "$PkClade=$D-$F" # set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb] # } set LesPk {} set LesGscopeId {} set LesGeneId {} set Sql "select pk_taxobla, gscopeid, gene_id from taxobla where pk_taxobla in ([join $LesPkTot ,])" LogWscope $Sql foreach {Pk GscopeId GeneId} [SqlExec $Sql "GetList"] { lappend LesPk $Pk lappend LesGscopeId $GscopeId if {$GeneId!=""} { lappend LesGeneId $GeneId } } set Pks [join $LesPk " "] set Noms [join $LesGscopeId " "] set GeneIds [join $LesGeneId " "] set NbHitsDesClades [join $LesNbHitsDesClades " "] set Resultat "$Pks/$Noms/$GeneIds/$NbHitsDesClades" return $Resultat } proc CladeContentWithinOi2017 {Clade {KindOfClades ""}} { JeMeSignale #rR on ne garde que ceux qui existent aussi dans Oi2017 ATTENTION AU DOMAINE ... à faire if {$KindOfClades==""} { if {[OiDomain]=="Archaea"} { set KindOfClades "Archaea" } if {[OiDomain]=="Eukaryota"} { set KindOfClades "CMC" } ; #rR a corriger pour la suite !!!!!!!!!!! } Espionne "OiDomain =[OiDomain]= KindOfClades =$KindOfClades=" if {$KindOfClades=="CMC"} { set Domaine "Eukaryota" set LesTaxId [CMC $Clade OX] } elseif {$KindOfClades=="JoyFungi"} { set Domaine "Eukaryota" set LesTaxId [Paraph $KindOfClades $Clade OX] } elseif {$KindOfClades=="Bac8"} { set Domaine "Bacteria" set LesTaxId [Paraph $KindOfClades $Clade OX] } elseif {$KindOfClades=="Bac9"} { set Domaine "Bacteria" set LesTaxId [Paraph $KindOfClades $Clade OX] } elseif {$KindOfClades=="TTLL"} { set Domaine "Eukaryota" set LesTaxId [TTLL $Clade OX] } elseif {$KindOfClades=="Archaea"} { set Domaine "Archaea" set LesTaxId [PhylAr $Clade ListOfOX] } else { set Domaine "Eukaryota" set LesTaxId [CladeContent $Clade] } set LesBons {} foreach TaxId $LesTaxId { #if {[OiCodeForDomain $Domaine $TaxId OI]==""} { continue } if {[OiCode $TaxId OI]==""} { continue } lappend LesBons $TaxId } return $LesBons } proc BlastomicsCreateDb {{Project ""} {KindOfClades ""} {GetWhat ""}} { if {$Project==""} { set Project [file tail [RepertoireDuGenome]] } set Bdd "[BlastomicsDbDir]/${Project}_${KindOfClades}.db" if {[FileExists $Bdd]} { if {[OuiOuNon "$Bdd already exists. Do I overwrite ?"]} { Garde $Bdd file delete $Bdd } else { return "Error: Already exists" } } Espionne [CanalSql [ConnInfoForDatabase $Bdd "CreateIfNotExists"]] SqlExec "BEGIN TRANSACTION" SqlExec "DROP TABLE IF EXISTS ln_clade_organism" SqlExec "DROP TABLE IF EXISTS ln_organism_taxobla" SqlExec "DROP TABLE IF EXISTS ln_branche_taxobla" SqlExec "DROP TABLE IF EXISTS branche" SqlExec "DROP TABLE IF EXISTS sapin" SqlExec "DROP TABLE IF EXISTS organism" SqlExec "DROP TABLE IF EXISTS taxobla" SqlExec "DROP TABLE IF EXISTS clade" SqlExec "CREATE TABLE clade (pk_clade INTEGER , nom VARCHAR(15))" SqlExec "CREATE TABLE organism(pk_organism INTEGER , taxid VARCHAR(10), specie VARCHAR(25))" SqlExec "CREATE TABLE taxobla (pk_taxobla INTEGER , gscopeid VARCHAR(10), chemin VARCHAR(20), uniprotaccess VARCHAR(12), uniprotid VARCHAR(15), gene_name VARCHAR(20), gene_id VARCHAR(20))" SqlExec "CREATE TABLE branche (pk_branche INTEGER , pk_sapin INTEGER, id_branche INTEGER, largeur INTEGER)" SqlExec "CREATE TABLE ln_clade_organism (pk_clade INTEGER, pk_organism INTEGER)" SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla INTEGER, pk_organism INTEGER)" SqlExec "CREATE TABLE ln_branche_taxobla (pk_branche INTEGER, pk_taxobla INTEGER)" SqlExec "CREATE TABLE sapin (pk_sapin INTEGER, pk_clade INTEGER)" if {0} { SqlExec "CREATE TABLE clade (pk_clade INTEGER PRIMARY KEY AUTOINCREMENT, nom VARCHAR(15))" SqlExec "CREATE TABLE organism(pk_organism INTEGER PRIMARY KEY AUTOINCREMENT, taxid VARCHAR(10), specie VARCHAR(25))" SqlExec "CREATE TABLE taxobla (pk_taxobla INTEGER PRIMARY KEY AUTOINCREMENT, gscopeid VARCHAR(10), chemin VARCHAR(20), uniprotaccess VARCHAR(12), uniprotid VARCHAR(15), gene_name VARCHAR(20), gene_id VARCHAR(20))" SqlExec "CREATE TABLE branche (pk_branche INTEGER PRIMARY KEY AUTOINCREMENT, pk_sapin INTEGER, id_branche INTEGER, largeur INTEGER, FOREIGN KEY(pk_sapin) REFERENCES sapin(pk_sapin))" SqlExec "CREATE TABLE ln_clade_organism (pk_clade INTEGER, pk_organism INTEGER, PRIMARY KEY(pk_clade, pk_organism), FOREIGN KEY(pk_clade) REFERENCES clade(pk_clade), FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism))" SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla INTEGER, pk_organism INTEGER, PRIMARY KEY(pk_taxobla, pk_organism), FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism), FOREIGN KEY(pk_taxobla) REFERENCES taxobla(pk_taxobla))" SqlExec "CREATE TABLE ln_branche_taxobla (pk_branche INTEGER, pk_taxobla INTEGER, PRIMARY KEY(pk_branche, pk_taxobla), FOREIGN KEY(pk_branche) REFERENCES branche(pk_branche), FOREIGN KEY(pk_taxobla) REFERENCES taxobla(pk_taxobla))" SqlExec "CREATE TABLE sapin (pk_sapin INTEGER PRIMARY KEY AUTOINCREMENT, pk_clade INTEGER, FOREIGN KEY(pk_clade) REFERENCES clade(pk_clade)) " } SqlExec "COMMIT" if {$GetWhat=="GetHandleAndFilename"} { return [list [CanalSql] $Bdd] } if {$GetWhat=="GetHandle" } { return [CanalSql] } CanalSqlDisconnect return $Bdd } proc BlastomicsNewQuery {{Project ""} {KindOfClades ""}} { set Overwrite 1 set RepSql "[RepertoireDuGenome]/blomesql" file mkdir $RepSql lassign [BlastomicsCreateDb $Project $KindOfClades "GetHandleAndFilename"] SqlHandle Bdd if {[regexp "^Error" $SqlHandle]} { return $SqlHandle } set LesFichiersACharger {} if {$Overwrite} { set TaxIdQuery [NotreOX] set ListOfClades [lsort -unique [BlastomicsClades $KindOfClades]] foreach Clade $ListOfClades { Espionne "Clade $Clade" set OrgsFromClade($Clade) [CladeContentWithinOi2017 $Clade $KindOfClades] Espionne $OrgsFromClade($Clade) set Cardinal($Clade) [llength $OrgsFromClade($Clade)] Espionne $Cardinal($Clade) foreach TaxId $OrgsFromClade($Clade) { lappend AllTaxId $TaxId set OsFrom($TaxId) [Tax $TaxId "Name"] lappend LesCladesDuTaxId($TaxId) $Clade } } set AllTaxId [lsort -unique -integer $AllTaxId] set PkClade 0 set SqlPourClade {} foreach Clade $ListOfClades { incr PkClade lappend SqlPourClade "$PkClade\t$Clade" set PkDuClade($Clade) $PkClade } lappend LesFichiersACharger [SauveLesLignes $SqlPourClade dans "$RepSql/10_SqlPour-clade.txt"] set PkOrganism 0 set SqlPourOrganism {} foreach TaxId [lsort -integer [array names OsFrom]] { set Species $OsFrom($TaxId) incr PkOrganism lappend SqlPourOrganism "$PkOrganism\t$TaxId\t$Species" set PkDuOrganismTaxId($TaxId) $PkOrganism set PkDuOrganismSpecies($Species) $PkOrganism } lappend LesFichiersACharger [SauveLesLignes $SqlPourOrganism dans "$RepSql/20_SqlPour-organism.txt"] set SqlPourLnCladeOrganism {} foreach Clade $ListOfClades { foreach TaxId $OrgsFromClade($Clade) { set PkOrganism $PkDuOrganismTaxId($TaxId) lappend SqlPourLnCladeOrganism $PkDuClade($Clade)\t$PkOrganism } } lappend LesFichiersACharger [SauveLesLignes $SqlPourLnCladeOrganism dans "$RepSql/30_SqlPour-ln_clade_organism.txt"] set SqlPourSapin {} foreach Clade $ListOfClades { set PkSapin $PkDuClade($Clade) ;#rR je ne sais pas si peut y en avoir d'autres ??? lappend SqlPourSapin "$PkSapin\t$PkDuClade($Clade)" lappend LesPkSapin $PkSapin lappend LesCladesDuSapin($PkSapin) $Clade } lappend LesFichiersACharger [SauveLesLignes $SqlPourSapin dans "$RepSql/40_SqlPour-sapin.txt"] set PkBranche 0 set SqlPourBranche {} foreach PkSapin $LesPkSapin { foreach Clade $LesCladesDuSapin($PkSapin) { set Largeur [llength $OrgsFromClade($Clade)] foreach IdBranche [NombresEntre 0 $Largeur] { incr PkBranche lappend SqlPourBranche $PkBranche\t$PkSapin\t$IdBranche\t$Largeur set PkBrancheDe($PkSapin,$IdBranche) $PkBranche } } } lappend LesFichiersACharger [SauveLesLignes $SqlPourBranche dans "$RepSql/50_SqlPour-branche.txt"] set PkTaxobla 0 set SqlPourTaxobla {} foreach Nom [ListeDesPABs] { incr PkTaxobla set PkTaxoblaDe($Nom) $PkTaxobla set Ac [ExtraitInfo $Nom "AC:"] set Id [ExtraitInfo $Nom "ID:"] set Gn [ExtraitInfo $Nom "ValiGN:"] ; if {$Gn==""} { set Gn [ExtraitInfo $Nom "GN:"] } set Gi [ExtraitInfo $Nom "GI:"] ; if {$Gi==""} { set Gi [NIAG $Nom "G"] } lappend SqlPourTaxobla "$PkTaxobla\t$Nom\ttaxobla\t$Ac\t$Id\t$Gn\t$Gi" } lappend LesFichiersACharger [SauveLesLignes $SqlPourTaxobla dans "$RepSql/60_SqlPour-taxobla.txt"] set SqlPourLnOrganismTaxobla {} foreach Nom [ListeDesPABs] { set PkTaxobla $PkTaxoblaDe($Nom) foreach Clade $ListOfClades { set PkClade $PkDuClade($Clade) set NbHitsDansClade($PkTaxobla,$PkClade) 0 } if {[FileAbsent [GscopeFile $Nom "taxobla"]]} { continue } foreach Ligne [LesLignesDuFichier [GscopeFile $Nom "taxobla"]] { scan $Ligne "%f %f %d" Pn Score TaxId if {$Pn>0.001} { break } set Score [expr round($Score)] if { ! [info exists PkDuOrganismTaxId($TaxId)]} { continue } set PkOrganism $PkDuOrganismTaxId($TaxId) lappend SqlPourLnOrganismTaxobla "$PkTaxobla\t$PkOrganism" foreach Clade $LesCladesDuTaxId($TaxId) { set PkClade $PkDuClade($Clade) incr NbHitsDansClade($PkTaxobla,$PkClade) } } } lappend LesFichiersACharger [SauveLesLignes $SqlPourLnOrganismTaxobla dans "$RepSql/70_SqlPour-ln_organism_taxobla.txt"] set SqlPourLnBrancheTaxobla {} foreach {K NbHits} [array get NbHitsDansClade] { lassign [split $K ","] PkTaxobla PkSapin if { ! [info exists PkBrancheDe($PkSapin,$NbHits)]} { Espionne "not existing PkBrancheDe($PkSapin,$NbHits) PkTaxobla $PkTaxobla" continue } set PkBranche $PkBrancheDe($PkSapin,$NbHits) ;#rR NbHits = IdBranche lappend SqlPourLnBrancheTaxobla "$PkBranche\t$PkTaxobla" } lappend LesFichiersACharger [SauveLesLignes $SqlPourLnBrancheTaxobla dans "$RepSql/80_SqlPour-ln_branche_taxobla.txt"] } else { set LesFichiersACharger [lsort [glob "$RepSql/*SqlPour-*"]] } foreach Fichier $LesFichiersACharger { set Queue [file tail $Fichier] if { ! [regexp {\-([^\.]+)\.} $Queue Match Table]} { continue } $SqlHandle copy abort $Table $Fichier "\t" Espionne "$Table done" } BlastomicsCreateIndex CanalSqlDisconnect return $Bdd } proc BlastomicsCreateIndex {{Bdd ""}} { if {$Bdd!=""} { CanalSql [ConnInfoForDatabase $Bdd] } SqlExec "CREATE INDEX iclade ON clade (pk_clade, nom)" SqlExec "CREATE INDEX iorganism ON organism (pk_organism, taxid, specie)" SqlExec "CREATE INDEX itaxobla ON taxobla (pk_taxobla, gscopeid)" SqlExec "CREATE INDEX ibranche ON branche (pk_branche)" SqlExec "CREATE INDEX isapin ON sapin (pk_sapin, pk_clade)" SqlExec "CREATE INDEX ilnco ON ln_clade_organism (pk_clade, pk_organism)" SqlExec "CREATE INDEX ilnot ON ln_organism_taxobla (pk_taxobla, pk_organism)" SqlExec "CREATE INDEX ilbt ON ln_branche_taxobla (pk_branche, pk_taxobla)" if {$Bdd!=""} { CanalSqlDisconnect ; return "" } return [CanalSql] } proc BlastomicsClades {{Quoi ""} {GetWhat ""}} { #rR Est appelé par le python Blame ... en question de science ... #rR Merci de me mettre à jour set Possibles [lsort [list Archaea Bac8 Bac9 CMC Cilio TTLL JoyFungi]] set Clades {} if {[regexp -nocase "Possible" $Quoi]} { set Clades $Possibles } if {$Quoi=="Archaea"} { set Clades [PhylAr ListOf PhyOS] } if {$Quoi=="CMC"} { set Clades [CMC ListOf JoinedClade] } if {$Quoi=="JoyFungi"} { set Clades [Paraph $Quoi ListOf JoinedClade] } if {$Quoi=="Bac8"} { set Clades [Paraph $Quoi ListOf JoinedClade] } if {$Quoi=="Bac9"} { set Clades [Paraph $Quoi ListOf JoinedClade] } if {$Quoi=="TTLL"} { set Clades [TTLL ListOf JoinedClade] } if {$Quoi=="Cilio" || [OnTraite "CilioCarta" "Like"]} { if {[OnTraite "CilioCarta"]} { set Clades [BilanCilio ListOf JoinedClade] } else { set Clades [QuestionDeScience "CilioCarta" "ret BilanCilio ListOf JoinedClade"] } } if {$GetWhat=="GetList"} { return [split $Clades " "] } return $Clades }