#rR gscope_circo.tcl #rR All for Circular Code ... proc HideRandomData {} { #rR I moved all random data to have less files in the directory #rR To bring them back simply #rR cd RandomData #rR mv * ../ NousAllonsAuBoulot [RepertoireDuGenome] file mkdir RandomData set LesDeplaces {} foreach CC [CirCode ListOf Index] { if {$CC=="00"} { continue } set LesRandom [lsort [glob -nocomplain "${CC}_*"]] foreach Random $LesRandom { file rename $Random "RandomData/$Random" lappend LesDeplaces $Random } } OnRevientDuBoulot return $LesDeplaces } proc DecortiqueBlastRnaDomain {{FichierBlast ""} {FichierHits ""}} { #rR on cherche les domaines équivalents aux domaines de Ecoli if {$FichierBlast==""} { set FichierBlast "[RepSil]/Ecoli_SSU_domain_5P.blast" } set FichierFasta "[RepSil]/BacSSU_Best_WithoutEmpty_NoGap" if {$FichierHits==""} { set FichierHits $FichierBlast regsub {.blast$} $FichierHits "" FichierHits append FichierHits ".hits" } set Current "" set OnAttendSegAli 1 set OnAttendLeProchainChevron 0 set DebutQuery "" set DebutSbjct "" foreach Ligne [LesLignesDuFichier $FichierBlast] { set YaDatabase [regexp {^ +Database:} $Ligne] set Access "" if {[regexp {^>([^ ]+)} $Ligne Match Access] || $YaDatabase } { set OnAttendLeProchainChevron 0 if {$Current!=""} { set Hit "$Current $DebutQuery $FinQuery $DebutSbjct $FinSbjct" #Espionne $Hit lappend LesHits $Hit } if {$YaDatabase} { break } set Current $Access set DebutQuery "" set DebutSbjct "" set OnAttendSegAli 0 continue } if { $OnAttendSegAli } { continue } if {[regexp "^ Score =" $Ligne] && $DebutQuery!=""} { #Espionne $Ligne ************* $DebutQuery set OnAttendLeProchainChevron 1 } if {$OnAttendLeProchainChevron} { continue } if {[regexp {Query:? +([0-9]+) +[^ ]+ +([0-9]+) *$} $Ligne Match DQ FQ]} { #Espionne $Ligne $DQ $FQ if {$DebutQuery==""} { set DebutQuery $DQ } set FinQuery $FQ } if {[regexp {Sbjct:? +([0-9]+) +[^ ]+ +([0-9]+) *$} $Ligne Match DS FS]} { #Espionne $Ligne $DS $FS if {$DebutSbjct==""} { set DebutSbjct $DS } set FinSbjct $FS } } set LaSortie {} foreach Hit $LesHits { lassign [split $Hit " "] Access DQ FQ DS FS set Tfa [LaSequenceDuTFAs $FichierFasta $Access] set Seq [QueLaSequenceDuTexteTFA $Tfa] set SeqDomain [string range $Seq $DS-1 $FS-1] lappend LaSortie "$Hit $SeqDomain" } set S [SauveLesLignes $LaSortie dans $FichierHits] return $S } proc ColiDomain {{Qui ""} {Quoi ""}} { global ColiDomain if {[info exists ColiDomain($Qui,$Quoi)]} { return $ColiDomain($Qui,$Quoi) } if {[info exists ColiDomain("EstCharge")]} { return "" } set ColiDomain("EstCharge") 1 set D "5P" ; lappend ColiDomain(SSU,ListOfDomain) $D ; set ColiDomain($D,limits) "1-559" set D "Central" ; lappend ColiDomain(SSU,ListOfDomain) $D ; set ColiDomain($D,limits) "560-920" set D "3PM" ; lappend ColiDomain(SSU,ListOfDomain) $D ; set ColiDomain($D,limits) "921-1398" set D "3Pm" ; lappend ColiDomain(SSU,ListOfDomain) $D ; set ColiDomain($D,limits) "921-1398" foreach Domain $ColiDomain(SSU,ListOfDomain) { set ColiDomain($Domain,SU) "SSU" } set D "I" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "1-561" set D "II" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "587-1250" set D "III" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "1271-1647" set D "IV" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "1679-1989" set D "V" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "2058-2610" set D "VI" ; lappend ColiDomain(LSU,ListOfDomain) $D ; set ColiDomain($D,limits) "2626-2895" foreach Domain $ColiDomain(LSU,ListOfDomain) { set ColiDomain($Domain,SU) "LSU" } set ColiDomain(ListOf,Domain) [concat $ColiDomain(SSU,ListOfDomain) $ColiDomain(LSU,ListOfDomain)] foreach Domain $ColiDomain(ListOf,Domain) { set Limits $ColiDomain($Domain,limits) lassign [split $Limits "-"] Start Stop set ColiDomain($Domain,Start) $Start set ColiDomain($Domain,Stop) $Stop } foreach SU [list SSU LSU] { set SeqTotale [QueLaSequenceDuFichierTFA "[RepSil]/Escherichia_coli_${SU}.tfa"] set SeqShift "n$SeqTotale" foreach Domain $ColiDomain($SU,ListOfDomain) { set Seq [string range $SeqShift $ColiDomain($Domain,Start) $ColiDomain($Domain,Stop)] set ColiDomain($Domain,Seq) $Seq } } return [ColiDomain $Qui $Quoi] } proc RepSil {} { return "/home/ripp/3d/sil" } proc RepSilva {} { return "/home/gopal/Silva/DeSilva" return "/genomics/link/ARN/DeSilva" } proc CreeBanqueBlastFromSilva {} { set LeTfa {} foreach {Access SeqGap} [LesLignesDuFichier "[RepSilva]/BacSSU_Best_WithoutEmpty.fasta"] { regsub -all {[\-\.]} $SeqGap "" Seq #Espionne [string length $SeqGap] [string length $Seq] lappend LeTfa $Access $Seq } return [SauveLesLignes $LeTfa dans "[RepSil]/BacSSU_Best_WithoutEmpty_NoGap.tfa"] } proc BlastColiDomains {} { set SU SSU foreach Domain [ColiDomain $SU ListOfDomain] { set Seq [ColiDomain $Domain Seq] set Limits [ColiDomain $Domain limits] set Tfa [SequenceFormatTFA $Seq "Ecoli_${SU}_domain_$Domain $Limits" "nucbrut"] set Fic "[RepSil]/Ecoli_${SU}_domain_$Domain.tfa" set Banque "[RepSil]/BacSSU_Best_WithoutEmpty_NoGap" Sauve $Tfa dans $Fic set Out $Fic regsub {\.tfa$} $Out "" Out append Out ".blast" exec blastall -p blastn -i $Fic -d $Banque -F F -v 20000 -b 20000 -o $Out Espionne $Out } } proc TestColi {} { set SeqGap [QueLaSequenceDuTFAs "[RepSilva]/BacSSU_Best_WithoutEmpty.fasta" "JQ512962.1.1537"] regsub -all {[\-\.]} $SeqGap "" Seq Espionne [string length $SeqGap] [string length $Seq] Espionne $Seq set SeqGapAli [FromMacsim /home/ripp/3d/ali/SSU_rRNA_alignment.xml Sequences Escherichia_coli] regsub -all {[\-\.]} $SeqGapAli "" SeqAli Espionne $SeqAli Espionne [string length $SeqGapAli] [string length $SeqAli] } proc CreateColiSeqs {} { set SeqGapAli [FromMacsim "/home/ripp/3d/ali/SSU_rRNA_alignment.xml" Sequences Escherichia_coli] regsub -all {[\-\.]} $SeqGapAli "" SeqAli Espionne $SeqAli Espionne [string length $SeqGapAli] [string length $SeqAli] set Tfa [SequenceFormatTFA $SeqAli "Eschericia_coli_SSU from SSU_rRNA_alignment" "nucbrut"] lappend LesFichiersCrees [Sauve $Tfa dans "[RepSil]/Escherichia_coli_SSU.tfa"] set SeqGapAli [FromMacsim "/home/ripp/3d/ali/LSU_rRNA_alignment.xml" Sequences Escherichia_coli] regsub -all {[\-\.]} $SeqGapAli "" SeqAli Espionne $SeqAli Espionne [string length $SeqGapAli] [string length $SeqAli] set Tfa [SequenceFormatTFA $SeqAli "Eschericia_coli_LSU from LSU_rRNA_alignment" "nucbrut"] lappend LesFichiersCrees [Sauve $Tfa dans "[RepSil]/Escherichia_coli_LSU.tfa"] return $LesFichiersCrees } proc RemoveEmptyPiliers Qui { set FichierGros "[RepSilva]/${Qui}_Best.fasta" set FichierPetit "[RepSilva]/${Qui}_Best_WithoutEmpty.fasta" set Cumul(0) 0 set PremiereFois 1 set LesI {} foreach {Access Sequence} [LesLignesDuFichier $FichierGros] { set Sequence [string map {. -} $Sequence] set I 0 foreach C [split $Sequence ""] { if {$PremiereFois} { lappend LesI $I } #Espionne $C if {$C ne "-"} { #Espionne autre $C incr Nb($I) lappend Owner($I) $Access } incr I } set PremiereFois 0 } Espionne Imax $I foreach I $LesI { if { [info exists Nb($I)]} { incr Cumul($Nb($I)) continue } incr Cumul(0) #Espionne empty $I } foreach N [lsort -integer -decreasing [array names Cumul]] { Espionne $N $Cumul($N) } Espionne [llength $LesI] columns Espionne $Cumul(0) empty colums Espionne [llength [array names Nb]] non empty colums if { ! [OuiOuNon "Ok to create the alignment without empty Piliers ? "]} { return "Ok I stopped" } set LeNouveau {} foreach {Access Sequence} [LesLignesDuFichier $FichierGros] { lappend LeNouveau $Access set I 0 set LesI {} set NewSeq "" foreach C [split $Sequence ""] { if {[info exists Nb($I)]} { append NewSeq $C } incr I } lappend LeNouveau $NewSeq } SauveLesLignes $LeNouveau dans $FichierPetit return hopla } proc FastaFromBestSilva {{Qui ""}} { JeMeSignale if {[regexp {All(.SU)} $Qui Match SU]} { set LaSortie {} lappend LaSortie [FastaFromBestSilva "Arc$SU"] lappend LaSortie [FastaFromBestSilva "Bac$SU"] lappend LaSortie [FastaFromBestSilva "Euk$SU"] return $LaSortie } if { ! [regexp {(Arc|Bac|Euk)(.SU)} $Qui Match Dom SU] } { return "(Arc|Bac|Euk)(.SU) est necessaire" } set FichierFasta "[RepSilva]/${SU}_Original.fasta" set FichierAccess "[RepSilva]/${Dom}${SU}_Best.access" set FichierNouveau "[RepSilva]/${Dom}${SU}_Best.fasta" Espionne FichierNouveau $FichierNouveau if {[file exists $FichierNouveau]} { if {[OuiOuNonMemo "Do I keep existing files" 1]} { return "$FichierNouveau already exists" } file delete $FichierNouveau } foreach Ligne [LesLignesDuFichier $FichierAccess] { scan $Ligne "%s" Access set Use($Access) 1 } Espionne [lrange [array names Use] 0 10] Espionne [llength [array names Use]] access lus set Canal [open $FichierFasta "r"] set Save 0 set I -1 while {[gets $Canal Ligne]>-1} { if {[regexp {^(>[^ ]+) } $Ligne Match Access]} { if {[incr I]%10000==0} { Espionne $Access $I } if {$Save} { AppendAuFichier $FichierNouveau $Sequence } set Sequence "" set Save 0 if {[info exists Use($Access)]} { #Espionne info exists Use($Access) [info exists Use($Access)] set Save 1 AppendAuFichier $FichierNouveau $Ligne } continue } if {$Save} { append Sequence $Ligne } } if {$Save} { AppendAuFichier $FichierNouveau $Sequence } return $FichierNouveau #SauveLesLignes $LeTfa dans $FichierNouveau #set LesAccess [LaSequenceDuTFAs $FichierFasta "LaListeDesAccess"] return hihi } proc BestSilva {{Qui ""}} { #rR grep " Archaea;" SU_Original.access > ArcSU.access #rR grep " Bacteria;" SU_Original.access > BacSU.access #rR grep " Eukaryiota;" SU_Original.access > EukSU.access if {[regexp {All(.SU)} $Qui Match SU]} { set LaSortie {} lappend LaSortie [BestSilva "Arc$SU"] lappend LaSortie [BestSilva "Bac$SU"] lappend LaSortie [BestSilva "Euk$SU"] return $LaSortie } set LeChoix {} set LesLignes [LesLignesDuFichier "[RepSilva]/${Qui}.access"] set LesLignes [lrange $LesLignes 0 end] foreach Ligne $LesLignes { if { ! [regexp "LSU" $Qui] && ! [regexp {\.1\.} $Ligne]} { continue } #Espionne $Ligne set iSpace [string first " " $Ligne] set ADF [string range $Ligne 0 $iSpace-1] set Tax [string range $Ligne $iSpace+1 end] set LesNoeuds [split $Tax ";"] set Last [lindex $LesNoeuds end] set E "" scan $Last "%s %s" G E if {$E==""} { continue } set GE "$G $E" lappend LesGE $GE lappend LesLignesDeGE($GE) $Ligne lappend LesLignesDeG($G) $Ligne lappend LesGEDeG($G) $GE lappend LesTaxDeGE($GE) $Tax lappend LesTaxDeG($G) $Tax lappend LesG $G } Espionne [llength $LesG] G not unique set LesGE [lsort -unique $LesGE] set LesG [lsort -unique $LesG ] Espionne [llength $LesG] G set MaxIdemG 2 set MaxIdemGE 2 if {$Qui=="ArcSSU"} { set MaxIdemG 9999 ; set MaxIdemGE 9999 } if {$Qui=="BacSSU"} { set MaxIdemG 2 ; set MaxIdemGE 2 } if {$Qui=="EukSSU"} { set MaxIdemG 2 ; set MaxIdemGE 2 } set LesGEok {} foreach G $LesG { set LesGEDeG($G) [lsort -unique $LesGEDeG($G)] set L [llength $LesGEDeG($G)] #Espionne $G $L lappend LesL $L lappend GDeL($L) $G if {$L<=$MaxIdemG} { LConcat LesGEok $LesGEDeG($G) } else { LConcat LesGEok [lrange $LesGEDeG($G) 0 $MaxIdemGE-1] } } set LesL [lsort -unique -decreasing -integer $LesL] foreach L $LesL { #Espionne $L $GDeL($L) } Espionne [llength $LesGEok] GEok foreach GE $LesGEok { set L [llength $LesLignesDeGE($GE)] if {$L<3} { LConcat LeChoix $LesLignesDeGE($GE) } else { set M [expr $L/2] lappend LeChoix [lindex $LesLignesDeGE($GE) 0] [lindex $LesLignesDeGE($GE) $M] [lindex $LesLignesDeGE($GE) end] } set Tax [lindex $LesTaxDeGE($GE) 0] #Espionne $Tax set Dom [string range $Tax 0 2] incr Nb($Dom) incr Nb($Dom,lines) $L } Espionne [llength $LeChoix] parray Nb SauveLesLignes $LeChoix dans "[RepSilva]/${Qui}_Best.access" return $Qui return $LesGEok } proc AllCodonsOrdered {} { set LesX {} set LesY {} foreach Codon [AllCodons] { if { [IsX0 $Codon] } { lappend LesX $Codon } else { lappend LesY $Codon } } return [concat $LesX $LesY] } proc CoFreqPourTousPourRandom {} { global EspionneNon set EspionneNon 1 foreach Index [lrange [CirCode ListOf Index] 0 end] { CoFreq Reset IsX0 Reset CirCode DefaultCirCode $Index set CF [CoFreqPourTous] lappend LaSortie $CF } return $LaSortie } proc CoFreqPourTous {} { set AllOrgs [concat [CoFreq ListOf Org] "All"] set DejaEntete 0 foreach Frame [list 0 1 2 A] { foreach Y [list L S] { set SumPCX($Frame,$Y) 0 ; set NbX($Frame,$Y) 0 set SumPCR($Frame,$Y) 0 ; set NbR($Frame,$Y) 0 lappend LaLigne $Y foreach Org $AllOrgs { set NiceOrg [string range $Org 0 2] #Espionne $Frame $Y $Org set LaLigne {} lappend LaLigne $Frame $Y $NiceOrg #lappend LaLigne [format "%-30s" $Org] set LaLigneEntete $LaLigne foreach Codon [AllCodonsOrdered] { if {[IsX0 $Codon]} { set NiceCodon [string toupper $Codon] } else { set NiceCodon [string tolower $Codon] } lappend LaLigneEntete $NiceCodon } if { ! $DejaEntete } { Espionne [join $LaLigneEntete " "] set DejaEntete 1 } foreach Codon [AllCodons] { set PC [CoFreq $Org-$Y-$Frame-PC $Codon] set NicePC [format "%3.1f" $PC] lappend LaLigne $NicePC #incr PCof($Org,$Y,$Frame,$Codon) $PC if {[IsX0 $Codon]} { set SumPCX($Frame,$Y) [expr $SumPCX($Frame,$Y) + $PC] incr NbX($Frame,$Y) } else { set SumPCR($Frame,$Y) [expr $SumPCR($Frame,$Y) + $PC] incr NbR($Frame,$Y) } } Espionne [join $LaLigne " "] } Espionne } Espionne } Espionne set LaLigne {} lappend LaLigne A . All set LaLigneEntete {} lappend LaLigneEntete " " " " " " foreach Codon [AllCodonsOrdered] { if {[IsX0 $Codon]} { set NiceCodon [string toupper $Codon] } else { set NiceCodon [string tolower $Codon] } lappend LaLigneEntete $NiceCodon } Espionne [join $LaLigneEntete " "] foreach Codon [AllCodonsOrdered] { #incr PCof($Org,$Y,$Frame,$Codon) $PC set PC [expr ([CoFreq All-L-A-PC $Codon]+[CoFreq All-S-A-PC $Codon])/2] lappend LaLigne [format "%3.1f" $PC] } Espionne [join $LaLigne " "] set MoyPCX [expr ($SumPCX(A,L)+$SumPCX(A,S))/($NbX(A,L)+$NbX(A,S))] set MoyPCR [expr ($SumPCR(A,L)+$SumPCR(A,S))/($NbR(A,L)+$NbR(A,S))] return [format "%-3s X : %5.3f nonX : %5.3f %s" [CirCode DefaultCirCode] $MoyPCX $MoyPCR [CirCode]] } proc CoFreq {{Qui ""} {Quoi ""}} { global CoFreq if {$Qui=="Reset"} { if {[info exists CoFreq]} { unset CoFreq } return } if {$Qui=="" && $Quoi==""} { set Qui "Help" } if {[info exists CoFreq($Qui,$Quoi)]} { return $CoFreq($Qui,$Quoi) } if {[info exists CoFreq("EstCharge")]} { if {[regexp {\-PC$} $Qui]} { return 0.0 } return "" } set CoFreq("EstCharge") 1 set LesOrgs [FromRnaAli ListOf Org] set CoFreq(ListOf,Org) $LesOrgs foreach Org $LesOrgs { foreach Y [list L S] { set Seq [FromRnaAli $Org $Y SeqNoGap] set NC0 0 set NC1 0 set NC2 0 set LaSeq [split $Seq ""] foreach Frame [list 0 1 2] { set LaSeq [lrange [split $Seq ""] $Frame end] foreach {A B C} $LaSeq { set C "$A$B$C" if {$C==""} { break } lappend CoFreq($Org-$Y-$Frame,Codons) $C ; incr CoFreq($Org-$Y-$Frame,$C) lappend CoFreq($Org-$Y-A,Codons) $C ; incr CoFreq($Org-$Y-A,$C) lappend CoFreq(All-$Y-$Frame,Codons) $C ; incr CoFreq(All-$Y-$Frame,$C) lappend CoFreq(All-$Y-A,Codons) $C ; incr CoFreq(All-$Y-A,$C) } set CoFreq($Org-$Y-$Frame,NC) [llength $CoFreq($Org-$Y-$Frame,Codons)] set CoFreq(All-$Y-$Frame,NC) [llength $CoFreq(All-$Y-$Frame,Codons)] } set CoFreq($Org-$Y-A,NC) [llength $CoFreq($Org-$Y-A,Codons)] set CoFreq(All-$Y-A,NC) [llength $CoFreq(All-$Y-A,Codons)] foreach Codon [AllCodons] { if { ! [info exists CoFreq($Org-$Y-A,$Codon)]} { set CoFreq($Org-$Y-A,$Codon) 0 } set CoFreq($Org-$Y-A-PC,$Codon) [format "%3.1f" [expr (100.*$CoFreq($Org-$Y-A,$Codon))/$CoFreq($Org-$Y-A,NC)]] } foreach Frame [list 0 1 2] { foreach Codon [AllCodons] { if { ! [info exists CoFreq($Org-$Y-$Frame,$Codon)]} { set CoFreq($Org-$Y-$Frame,$Codon) 0 } set CoFreq($Org-$Y-$Frame-PC,$Codon) [format "%3.1f" [expr (100.*$CoFreq($Org-$Y-$Frame,$Codon))/$CoFreq($Org-$Y-$Frame,NC)]] } } } } if {0} { foreach Org $LesOrgs { foreach Y [list L S] { set Seq [FromRnaAli $Org $Y SeqNoGap] set NC0 0 set NC1 0 set NC2 0 set LaSeq [split $Seq ""] foreach A [lrange $LaSeq 0 end-2] B [lrange $LaSeq 1 end-1] C [lrange $LaSeq 2 end] D [lrange $LaSeq 3 end] E [lrange $LaSeq 4 end] { set C0 "$A$B$C" set C1 "$B$C$D" set C2 "$C$D$E" if {$C!=""} { lappend CoFreq($Org-$Y-0,Codons) $C0 ; incr NC0 ; lappend CoFreq($Org-$Y-A,Codons) $C0 ; incr NC} if {$D!=""} { lappend CoFreq($Org-$Y-1,Codons) $C1 ; incr NC1 ; lappend CoFreq($Org-$Y-A,Codons) $C1 ; incr NC} if {$E!=""} { lappend CoFreq($Org-$Y-2,Codons) $C2 ; incr NC2 ; lappend CoFreq($Org-$Y-A,Codons) $C2 ; incr NC} incr CoFreq($Org-$Y-0,$C0) incr CoFreq($Org-$Y-1,$C1) incr CoFreq($Org-$Y-2,$C2) incr CoFreq($Org-$Y-A,$C0) incr CoFreq($Org-$Y-A,$C1) incr CoFreq($Org-$Y-A,$C2) if {$C!=""} { lappend CoFreq(All-$Y-0,Codons) $C0 ; incr NC0All ; lappend CoFreq(All-$Y-A,Codons) $C0 } if {$D!=""} { lappend CoFreq(All-$Y-1,Codons) $C1 ; incr NC1All ; lappend CoFreq(All-$Y-A,Codons) $C1 } if {$E!=""} { lappend CoFreq(All-$Y-2,Codons) $C2 ; incr NC2All ; lappend CoFreq(All-$Y-A,Codons) $C2 } incr CoFreq(All-$Y-0,$C0) incr CoFreq(All-$Y-1,$C1) incr CoFreq(All-$Y-2,$C2) incr CoFreq(All-$Y-A,$C0) incr CoFreq(All-$Y-A,$C1) incr CoFreq(All-$Y-A,$C2) } set CoFreq($Org-$Y-A,Codons) [lsort -unique $CoFreq($Org-$Y-A,Codons)] set CoFreq($Org-$Y-A,NC) $NC foreach Codon $CoFreq($Org-$Y-A,Codons) { if { ! [info exists CoFreq($Org-$Y-A,$Codon)]} { set CoFreq($Org-$Y-A,$Codon) 0 } set CoFreq($Org-$Y-A-PC,$Codon) [expr (100.*$CoFreq($Org-$Y-A,$Codon))/$CoFreq($Org-$Y-A,NC)] } foreach Frame [list 0 1 2] { set CoFreq($Org-$Y-$Frame,Codons) [lsort -unique $CoFreq($Org-$Y-$Frame,Codons)] set CoFreq($Org-$Y-0,NC) $NC0 set CoFreq($Org-$Y-1,NC) $NC1 set CoFreq($Org-$Y-2,NC) $NC2 foreach Codon $CoFreq($Org-$Y-$Frame,Codons) { if { ! [info exists CoFreq($Org-$Y-$Frame,$Codon)]} { set CoFreq($Org-$Y-$Frame,$Codon) 0 } set CoFreq($Org-$Y-$Frame-PC,$Codon) [expr (100.*$CoFreq($Org-$Y-$Frame,$Codon))/$CoFreq($Org-$Y-$Frame,NC)] } } set CoFreq(All-$Y-0,NCAll) $NC0All set CoFreq(All-$Y-1,NCAll) $NC1All set CoFreq(All-$Y-2,NCAll) $NC2All set CoFreq(All-$Y-A,NCAll) [llength $CoFreq(All-$Y-A,Codons)] } } } foreach Y [list L S] { set CoFreq(All-$Y-A,Codons) [lsort -unique $CoFreq(All-$Y-A,Codons)] foreach Codon $CoFreq(All-$Y-A,Codons) { if { ! [info exists CoFreq(All-$Y-A,$Codon)]} { set CoFreq(All-$Y-A,$Codon) 0 } set CoFreq(All-$Y-A-PC,$Codon) [format "%3.1f" [expr (100.*$CoFreq(All-$Y-A,$Codon))/$CoFreq(All-$Y-A,NC)]] } foreach Frame [list 0 1 2 A] { set CoFreq(All-$Y-$Frame,Codons) [lsort -unique $CoFreq(All-$Y-$Frame,Codons)] foreach Codon $CoFreq(All-$Y-$Frame,Codons) { if { ! [info exists CoFreq(All-$Y-${Frame},$Codon)]} { set CoFreq(All-$Y-${Frame},$Codon) 0 } set CoFreq(All-$Y-$Frame-PC,$Codon) [format "%3.1f" [expr (100.*$CoFreq(All-$Y-$Frame,$Codon))/$CoFreq(All-$Y-$Frame,NC)]] } } } return [CoFreq $Qui $Quoi] } proc CountXMotifsForAll {{Liste ""}} { if {$Liste==""} { set Liste [list BorbuCDS EcoliCDS JoyScCDS Pabyssi MAMA MmusCDS MGS] } foreach Org $Liste { #Espionne [YCodeFor $Org "GetClosest216"]; continue set Closest [lindex [YCodeFor $Org "GetClosest216"] 0] set CCClosest [expr 1000+$Closest] set LesCC {} set LesValeurs {} foreach CC [list "" TOP20 $CCClosest MFC 20 50 92] { lappend LesCC $CC set Commande "tcsh -c \"setgscoperr $Org -qq ; gscope puts CountXMotifs $CC\"" #Espionne $Commande set Reponse [eval exec $Commande] lassign $Reponse R E lappend LesValeurs $R } #????????????????????? I421-I433 set Yori [eval exec "tcsh -c \"setgscoperr $Org -qq ; gscope puts YCodeFor - GetMFC\""] set Top20 [eval exec "tcsh -c \"setgscoperr $Org -qq ; gscope puts YCodeFor - Top20\""] #Espionne $E $LesValeurs set Message [format "%6d : %6d / %6d / %6d / %6d / %6d / %6d / %6d %-10s Top20 : %s MFC : %s" $E {*}$LesValeurs $Org $Top20 $Yori] Espionne $Message lappend LaSortie $Message } return $LaSortie } proc CountXMotifs {{CC ""}} { if {$CC!=""} { CirCode DefaultCirCode $CC } set NbTotal 0 set TotalExpectation 0 set TotalSeqLength 0 foreach Nom [ListeDesPABs] { foreach Fic [glob -nocomplain "[RepertoireDuGenome]/nuctfa/$Nom*"] { set Nb 0 set Seq [QueLaSequenceDuFichierTFA $Fic] set SeqLength [string length $Seq] incr TotalSeqLength $SeqLength set LesX [XMotifs $Seq] set Expectation [expr ($SeqLength/3 - 11)*(20./61)*(19./61)*(18./61)*(17./61)] set TotalExpectation [expr $TotalExpectation+$Expectation] foreach {LC Hits} $LesX { incr Nb [llength $Hits] } incr NbTotal $Nb } } set TotalExpectation [expr round($TotalExpectation)] set TotalExpectationGenome [expr round(($TotalSeqLength/3 - 11)*(20./61)*(19./61)*(18./61)*(17./61))] #Espionne [format "%-8s %6d %s %6d %s %6d" [file tail [RepertoireDuGenome]] $NbTotal TotalExpectation $TotalExpectation "... and for the genome" $TotalExpectationGenome] return "$NbTotal $TotalExpectation" } proc YCodeFor {{Org ""} {GetWhat ""}} { if {$Org=="" || $Org=="-"} { set Org [file tail [RepertoireDuGenome]] } set MfcFile "/home/ripp/3d/MoreFrequentCodonsSamples.txt" set LesLignes [LesLignesDuFichier $MfcFile] set i [lsearch $LesLignes "/genomics/link/$Org/nuctfa"] if {$i<0 && [regexp {^Joy} $Org]} { set i [lsearch $LesLignes "/genomics/link/Joy/$Org/nuctfa"] } if {$i<0} { set i [lsearch $LesLignes "$Org"] } if {$i<0} { set MfcFile "/home/ripp/3d/MoreFrequentCodonsMGS.txt" set LesLignes [LesLignesDuFichier $MfcFile] set i [lsearch $LesLignes "$Org"] } set LesInfos [lrange $LesLignes $i+1 $i+11] lassign $LesInfos C0 C1 C2 Bidon X0 X1 X2 S0 S1 S2 Top set Top20 [lrange [split $Top " "] 1 20] set TOP20 [string toupper $Top20] if {$GetWhat=="Top20"} { return $Top20 } if {$GetWhat=="TOP20"} { return $TOP20 } set LesY0 [lrange [split $C0 " "] 2 end] set LesY1 [lrange [split $C1 " "] 2 end] set LesY2 [lrange [split $C2 " "] 2 end] regsub -all { +} $S0 " " S0 regsub -all { +} $S1 " " S1 regsub -all { +} $S2 " " S2 foreach x0 [split $X0 " "] x1 [split $X1 " "] x2 [split $X2 " "] s0 [split $S0 " "] s1 [split $S1 " "] s2 [split $S2 " "] { if {$x0==0} { continue } if {$s0=="%"} { set n0 $x0; set n1 $x1; set n2 $x2; continue } if {$x0=="///"} { continue } if {$x0!="..."} { set Codon $x0 } if {$x1!="..."} { set Codon $x1 } if {$x2!="..."} { set Codon $x2 } set FC0($Codon) $s0 set FC1($Codon) $s1 set FC2($Codon) $s2 } set LesY {} foreach Y $LesY0 { set Yoriginal $Y set Y [string toupper $Y] if {$Y=="AAA" || $Y=="CCC" || $Y=="GGG" || $Y=="TTT"} { continue } lappend LesYoriginaux $Yoriginal lappend LesY $Y set IsY($Y) 1 } set MaxN 0 foreach Index [C216 ListOf Index] { set N 0 foreach Codon [C216 $Index] { if {[info exists IsY($Codon)]} { incr N } } set MaxN [Maxi $N $MaxN] lappend IndexOf($N) $Index #Espionne $N $Index } #Espionne $MaxN $IndexOf($MaxN) if {$GetWhat=="GetClosest216"} { return $IndexOf($MaxN) } if {$GetWhat=="GetMFC"} { return $LesYoriginaux } return $LesY } proc MoreFrequentCodonsPourMGS {} { NousAllonsAuBoulot "/genomics/link/MGS" set RepData "DataMoreFrequentCodons" file mkdir $RepData set LesRep [lsort [glob "nuctfa_*"]] set LaSortie {} foreach Rep $LesRep { regsub "nuctfa_" $Rep "" Organism set Stats [MoreFrequentCodons $Rep "$RepData/Data_MGS_$Organism.txt" $Organism] LConcat LaSortie $Stats EspionneL $Stats } set Bilan [SauveLesLignes $LaSortie dans "/home/ripp/3d/MoreFrequentCodonsMGS.txt"] OnRevientDuBoulot return $Bilan } proc MoreFrequentCodonsPourTous {} { set LesProjets [list MGS MAMA EcoliCDS BorbuCDS Pabyssi Aful Mjan Joy/JoyDhCDS Joy/JoyKcCDS Joy/JoyKlCDS Joy/JoyLeCDS Joy/JoyMgCDS Joy/JoyScCDS Joy/JoySpCDS Joy/JoySsCDS Joy/JoyYlCDS] set LaSortie {} foreach Projet $LesProjets { set Stats [MoreFrequentCodons "/genomics/link/$Projet/nuctfa"] LConcat LaSortie $Stats EspionneL $Stats } set AllTogether [MoreFrequentCodons $LesProjets] LConcat LaSortie $Stats #return $AllTogether return [SauveLesLignes $LaSortie dans "/home/ripp/3d/MoreFrequentCodonsSamples.txt"] } proc MoreFrequentCodonsPourJoy {} { set LesProjets [lrange [lsort [glob "/genomics/link/Joy/*CDS"]] 0 end] set LaSortie {} foreach Projet $LesProjets { set Stats [MoreFrequentCodons "$Projet/nuctfa"] LConcat LaSortie $Stats EspionneL $Stats } set AllTogether [MoreFrequentCodons $LesProjets] LConcat LaSortie $AllTogether #return $AllTogether return [SauveLesLignes $LaSortie dans "/home/ripp/3d/MoreFrequentCodonsJoy.txt"] } proc XCodons {Frame} { foreach Codon [CirCode] { set Codon1 "[string range $Codon 1 end][string index $Codon 0]" set Codon2 "[string range $Codon1 1 end][string index $Codon1 0]" lappend Frame0 $Codon lappend Frame1 $Codon1 lappend Frame2 $Codon2 } return [set Frame$Frame] } proc MoreFrequentCodons {{RepNucTfa ""} {FichierData ""} {Organism ""}} { if {$RepNucTfa==""} { set RepNucTfa "[RepertoireDuGenome]/nuctfa" if {$Organims==""} { set Organism [NotreOS] } } else { if {$Organism==""} { set Organism [file tail [file dirname $RepNucTfa]] } } #Espionne $Organism if {$FichierData==""} { set FichierData "/home/ripp/3d/Data_$Organism.txt" regsub -all " " $FichierData "_" FichierData } if {$RepNucTfa=="All"} { set Organism "All" set LesNucTfa [lsort [concat \ [glob /genomics/link/MAMA/nuctfa/*] \ [glob /genomics/link/EcoliCDS/nuctfa/*] \ [glob /genomics/link/Joy/Joy*CDS/nuctfa/*] \ ]] } elseif {[llength $RepNucTfa]>1} { set Organism [join $RepNucTfa " "] set LesNucTfa {} foreach Projet $RepNucTfa { LConcat LesNucTfa [glob /genomics/link/$Projet/nuctfa/*] } } elseif {$RepNucTfa=="AllJoy"} { set Organism "AllJoy" set LesNucTfa [lsort [concat \ [glob /genomics/link/Joy/Joy*CDS/nuctfa/*] \ ]] } else { set LesNucTfa [lsort [glob -nocomplain "$RepNucTfa/*"]] } foreach Codon [AllCodons] { set TotNb(0,$Codon) 0 set TotNb(1,$Codon) 0 set TotNb(2,$Codon) 0 set TotNb($Codon) 0 #if {$Codon=="AAA" || $Codon=="CCC" || $Codon=="GGG" || $Codon=="TTT"} { continue } lappend LesCodons $Codon } set TotMes(0) {} set TotMes(1) {} set TotMes(2) {} set LesData {} foreach FicNuc $LesNucTfa { set Nom [file tail $FicNuc] if {[regexp {^(TROU|TRNA|rRNA|ncRNA)} $Nom]} { continue } foreach Codon [AllCodons] { set Nb(0,$Codon) 0 set Nb(1,$Codon) 0 set Nb(2,$Codon) 0 set Nb($Codon) 0 } set Mes(0) {} set Mes(1) {} set Mes(2) {} set Seq [QueLaSequenceDuFichierTFA $FicNuc] set Seq [string toupper $Seq] regsub {^(ATG|TTG|GTG)} $Seq "" Seq regsub {(TAA|TAG|TGA)$} $Seq "" Seq foreach {A B C} [split $Seq ""] { set Codon "$A$B$C" lappend MesCodons(0) $Codon incr Nb(0,$Codon) incr Nb($Codon) incr TotNb(0,$Codon) incr TotNb($Codon) } foreach {A B C} [split [string range $Seq 1 end-2] ""] { set Codon "$A$B$C" lappend MesCodons(1) $Codon incr Nb(1,$Codon) incr Nb($Codon) incr TotNb(1,$Codon) incr TotNb($Codon) } foreach {A B C} [split [string range $Seq 2 end-1] ""] { set Codon "$A$B$C" lappend MesCodons(2) $Codon incr Nb(2,$Codon) incr Nb($Codon) incr TotNb(2,$Codon) incr TotNb($Codon) } set LesDataParSeq {} set Count 0 foreach Codon [AllCodons] { lappend LesDataParSeq $Codon $Nb($Codon) $Nb(0,$Codon) $Nb(1,$Codon) $Nb(2,$Codon) incr Count $Nb($Codon) if {$Nb($Codon)==0} { set Pc(0,$Codon) 0 set Pc(1,$Codon) 0 set Pc(2,$Codon) 0 } else { set Pc(0,$Codon) [expr (100*$Nb(0,$Codon))/$Nb($Codon)] set Pc(1,$Codon) [expr (100*$Nb(1,$Codon))/$Nb($Codon)] set Pc(2,$Codon) [expr (100*$Nb(2,$Codon))/$Nb($Codon)] } if {$Nb(0,$Codon)> $Nb(1,$Codon) && $Nb(0,$Codon)> $Nb(2,$Codon)} { lappend Mes(0) $Codon continue } if {$Nb(1,$Codon)>=$Nb(0,$Codon) && $Nb(1,$Codon)> $Nb(2,$Codon)} { lappend Mes(1) $Codon continue } if {$Nb(2,$Codon)>=$Nb(0,$Codon) && $Nb(2,$Codon)>=$Nb(1,$Codon)} { lappend Mes(2) $Codon continue } } lappend LesData "$Nom $Count [join $LesDataParSeq { }]" #lappend LaSortie "$Nom" #lappend LaSortie "0 [llength $Mes(0)] $Mes(0) #lappend LaSortie "1 [llength $Mes(1)] $Mes(1) #lappend LaSortie "2 [llength $Mes(2)] $Mes(2) #rR Fin boucle sur Nom } foreach Codon $LesCodons { if {$TotNb($Codon)==0} { set TotPc(0,$Codon) 0 set TotPc(1,$Codon) 0 set TotPc(2,$Codon) 0 set TotPx(0,$Codon) 0. set TotPx(1,$Codon) 0. set TotPx(2,$Codon) 0. } else { set TotPc(0,$Codon) [expr (100*$TotNb(0,$Codon))/$TotNb($Codon)] set TotPc(1,$Codon) [expr (100*$TotNb(1,$Codon))/$TotNb($Codon)] set TotPc(2,$Codon) [expr (100*$TotNb(2,$Codon))/$TotNb($Codon)] set TotPx(0,$Codon) [expr (100.*$TotNb(0,$Codon))/$TotNb($Codon)] set TotPx(1,$Codon) [expr (100.*$TotNb(1,$Codon))/$TotNb($Codon)] set TotPx(2,$Codon) [expr (100.*$TotNb(2,$Codon))/$TotNb($Codon)] } if {$TotNb(0,$Codon)> $TotNb(1,$Codon) && $TotNb(0,$Codon)> $TotNb(2,$Codon)} { set IsFrame(0,$Codon) 1 if { ! [IsX0 $Codon]} { set Codon [string tolower $Codon]} lappend TotMes(0) $Codon continue } if {$TotNb(1,$Codon)>=$TotNb(0,$Codon) && $TotNb(1,$Codon)> $TotNb(2,$Codon)} { set IsFrame(1,$Codon) 1 if { ! [IsX0 $Codon]} { set Codon [string tolower $Codon] } lappend TotMes(1) $Codon continue } if {$TotNb(2,$Codon)>=$TotNb(0,$Codon) && $TotNb(2,$Codon)>=$TotNb(1,$Codon)} { set IsFrame(2,$Codon) 1 if { ! [IsX0 $Codon]} { set Codon [string tolower $Codon] } lappend TotMes(2) $Codon continue } } set LesD {} foreach FrameCodon [array names TotPx "0,*"] { #Espionne $FrameCodon regsub {0\,} $FrameCodon "" Codon set N0 $TotPx(0,$Codon) set N1 $TotPx(1,$Codon) set N2 $TotPx(2,$Codon) set D [expr $N0-[Maxi $N1 $N2]] lappend LesD $D lappend LesCodonsDe($D) $Codon } set LesD [lsort -unique -real -decreasing $LesD] set Top60 {} foreach N $LesD { foreach Codon $LesCodonsDe($N) { if { ! [regexp {[ATGC]{3}} $Codon] } { continue } #Espionne $N $Codon lappend Top64 $Codon if {$Codon=="AAA" || $Codon=="CCC" || $Codon=="GGG" || $Codon=="TTT"} { continue } lappend TOP60 $Codon } } set Top60 {} foreach Codon $TOP60 { if { ! [IsX0 $Codon]} { set Codon [string tolower $Codon] } lappend Top60 $Codon } set Top20 [lrange $Top60 0 19] lappend LaSortie "" lappend LaSortie $Organism lappend LaSortie "0 [llength $TotMes(0)] $TotMes(0)" lappend LaSortie "1 [llength $TotMes(1)] $TotMes(1)" lappend LaSortie "2 [llength $TotMes(2)] $TotMes(2)" lappend LaSortie "" foreach Frame [list 0 1 2] { set LesT {} foreach Codon [XCodons 0] { set T "..." if {[info exists IsFrame($Frame,$Codon)]} { set T $Codon } lappend LesT $T } lappend LesT "///" foreach Codon [XCodons 1] { set T "..." if {[info exists IsFrame($Frame,$Codon)]} { set T $Codon } lappend LesT $T } lappend LesT "///" foreach Codon [XCodons 2] { set T "..." if {[info exists IsFrame($Frame,$Codon)]} { set T $Codon } lappend LesT $T } lappend LesT "///" foreach Codon [list AAA CCC GGG TTT] { set T "..." if {[info exists IsFrame($Frame,$Codon)]} { set T $Codon } lappend LesT $T } lappend LaSortie "$Frame [llength $TotMes($Frame)] [join $LesT { }]" } foreach Frame [list 0 1 2] { set LesT {} foreach Codon [XCodons 0] { set T [format "%3d" $TotPc($Frame,$Codon)] lappend LesT $T } lappend LesT "///" foreach Codon [XCodons 1] { set T [format "%3d" $TotPc($Frame,$Codon)] lappend LesT $T } lappend LesT "///" foreach Codon [XCodons 2] { set T [format "%3d" $TotPc($Frame,$Codon)] lappend LesT $T } lappend LesT "///" foreach Codon [list AAA CCC GGG TTT] { set T [format "%3d" $TotPc($Frame,$Codon)] lappend LesT $T } lappend LaSortie "$Frame % [join $LesT { }]" } lappend LaSortie "Tp20 [join $Top20 { }] 21-60 [join [lrange $Top60 20 end] { }]" #SauveLesLignes $LesData dans $FichierData return $LaSortie } proc TunnelThermus {} { set LesT [list 15-37 52-76 84-100 445-520 562-590 669-690 735-760 765-811 1247-1292 1307-1341 1601-1620 1764-1790 1935-1945 2005-2020 2046-2075 2435-2458 2490-2512 2546-2560 2570-2620] foreach T $LesT { lassign [split $T "-"] D F set LesDestin [OverlapRna L $D $F Thermus_thermophilus "" "GetLimits"] regsub " " $LesDestin "-" LesDestin lappend LaSortie $LesDestin } return "[join $LesT { }] Escherichia_coli\n[join $LaSortie { }] Thermus_thermophilus" } proc A2M {{Qui ""}} { global A2M if {[info exists A2M($Qui)]} { return $A2M($Qui) } if {[info exists A2M("EstCharge")]} { return "" } set A2M("EstCharge") 1 foreach Y [list S L] { set LesStart {} foreach A [RnaMotif ListOf ${Y}Id] { set Start [RnaMotif $A Start] lappend AduStart($Start) $A lappend LesStart $Start } set I 0 foreach Start [lsort -integer $LesStart] { incr I set M M$I set A $AduStart($Start) if {[regexp {[a-z]} $A]} { set M [string tolower $M] lappend A2M(ma) [format "%-3s %-3s" $M $A] lappend A2M(am) [format "%-3s %-3s" $A $M] lappend A2M(lesm) $M lappend A2M(lesa) $A } else { lappend A2M(MA) [format "%-3s %-3s" $M $A] lappend A2M(AM) [format "%-3s %-3s" $A $M] lappend A2M(lesM) $M lappend A2M(lesA) $A } set A2M($A) $M set A2M($M) $A } } set A2M(lesm) [lsort -dictionary $A2M(lesm)] set A2M(lesM) [lsort -dictionary $A2M(lesM)] set A2M(lesa) [lsort -dictionary $A2M(lesa)] set A2M(lesA) [lsort -dictionary $A2M(lesA)] foreach K [list am ma AM MA] { set A2M($K) [lsort -dictionary $A2M($K)] } foreach A $A2M(lesa) { set M $A2M($A) lappend L1 [format "%-3s" $A] lappend L2 [format "%-3s" $M] } lappend L1 " " lappend L2 " " foreach A $A2M(lesA) { set M $A2M($A) lappend L1 [format "%-3s" $A] lappend L2 [format "%-3s" $M] } set A2M(horiA) "[join $L1 { }]\n[join $L2 { }]" set L1 {} set L2 {} foreach M $A2M(lesm) { set A $A2M($M) lappend L1 [format "%-3s" $M] lappend L2 [format "%-3s" $A] } lappend L1 " " lappend L2 " " foreach M $A2M(lesM) { set A $A2M($M) lappend L1 [format "%-3s" $M] lappend L2 [format "%-3s" $A] } set A2M(horiM) "[join $L1 { }]\n[join $L2 { }]" return [A2M $Qui] } proc AllAboutMoreFrequentCodonsMGS {} { set LaSortie {} lappend LaSortie "see Stats about More Frequent Codon in Frames for MGS" lappend LaSortie "
\n$Sortie\n" return $Html } proc AllCoMa {{Qui ""} {Quoi ""} {Frame ""} {CirCode ""}} { #rR Here we load all CodonMatrix array in one big array AllCoMa. global AllCoMa set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] if {[info exists AllCoMa($Frame,$Qui,$Quoi,$CirCode)]} { return $AllCoMa($Frame,$Qui,$Quoi,$CirCode) } if {[info exists AllCoMa("EstCharge")]} { if {$CirCode==""} { set LeR {} foreach CC [lrange [CirCode ListOf Index] 1 end] { lappend LeH [format "%-6s" $CC] lappend LeR [format "%-6s" [AllCoMa $Qui $Quoi $Frame $CC]] } return [join $LeH " "]\n[join $LeR " "] } return "" } set AllCoMa("EstCharge") 1 set AllCoMa(LISTOF,CC) [CirCode ListOf Index] foreach CC [CirCode ListOf Index] { set CodonMatrixFile [CodonMatrixFileHuman $CC] if {[FileAbsent $CodonMatrixFile]} { continue } set CM [ContenuDuFichier $CodonMatrixFile] foreach {K V} $CM { if {[regexp "AAC,AAG" $K]} { Espionne $K $V } set AllCoMa($K,$CC) $V } } return [AllCoMa $Qui $Quoi $Frame $CirCode] } proc CreateAllImagesForAllPdfTypes {} { set LesImagesCreees {} foreach PdfType [PossibleFichesGraphPdfTypes] { lappend $LesImagesCreees [CreateAllImagesFor $PdfType] } set ImagesCreees [join $LesImagesCreees "\n"] return "
$LesImagesCreees" } proc PossibleFichesGraphPdfTypes {} { set LesPossibles [glob -nocomplain "[RepertoireDuGenome]/*_FichesGraph/*.pdf"] set LesTypes {} foreach Possible $LesPossibles { lappend LesTypes [file tail $Possible] } set LesTypes [lsort -unique $LesTypes] return $LesTypes } proc CreateAllImagesFor {{PdfName ""}} { if {$PdfName==""} { set LesTypes [PossibleFichesGraphPdfTypes] set PdfName [ChoixParmi $LesTypes] if {$PdfName==""} { return "" } } regsub {.pdf$} $PdfName "" PdfName set LesPdf [lsort -dictionary [glob -nocomplain "[RepertoireDuGenome]/*_FichesGraph/$PdfName.pdf"]] if {$LesPdf=={}} { return "" } file mkdir "[RepertoireDuGenome]/AllImages" set ImageDir "[RepertoireDuGenome]/AllImages/$PdfName" file mkdir $ImageDir set LesImagesCreees {} foreach Pdf $LesPdf { Espionne $Pdf set DirName [file dirname $Pdf] set DirQueue [file tail $DirName] if { ! [regexp {^[0-9]+_} $DirQueue CCU]} { FaireLire "No CirCodeUnderscore 00_ in $Pdf" } set ImageFile "$ImageDir/$CCU$PdfName.png" exec convert -background white -flatten $Pdf $ImageFile Espionne $ImageFile lappend LesImagesCreees $ImageFile } set Titre [file tail $ImageDir] NousAllonsAuBoulot $ImageDir LogWscope "exec Photorama $Titre" exec Photorama $Titre OnRevientDuBoulot return [join $LesImagesCreees "\n"] } proc TestCC1 {} { Espionne [PrintCodonMatrix 0 NormColumn 03] Espionne [PrintCodonMatrix 0 NormColumn 04] Espionne [PrintCodonMatrix 0 NormColumn 03] Espionne [PrintCodonMatrix 0 NormColumn 04] exit } proc TestCC2 {} { global CodonMatrix CodonMatrix parray CodonMatrix exit } proc TestCC3 {} { puts [CirCode DefaultCirCode] CirCode DefaultCirCode 22 puts [CirCode DefaultCirCode] exit } proc X0MotifsFeaturesDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_X0MotifsFeatures" } proc X1MotifsFeaturesDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_X1MotifsFeatures" } proc X2MotifsFeaturesDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_X2MotifsFeatures" } proc MacsimRsfDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_macsimRsf" } proc MacsimXmlDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_macsimXml" } proc SumOfPairsDir {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_SumOfPairs" } proc SumOfPairsForCodonsDir {} { set CC [CirCode "DefaultCirCode"] return "[RepertoireDuGenome]/${CC}_SumOfPairsForCodons" } proc SumOfPairsForCodonsHumanDir {} { set CC [CirCode "DefaultCirCode"] return "[RepertoireDuGenome]/${CC}_SumOfPairsForCodonsHuman" } proc FichesGraphDir {} { set CC [CirCode "DefaultCirCode"] return "[RepertoireDuGenome]/${CC}_FichesGraph" } proc CodonMatrixFile {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_CodonMatrix.txt" } proc CodonMatrixFileHuman {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_CodonMatrixHuman.txt" } proc AllInMotifFile {{CC ""}} { if {$CC==""} { set CC [CirCode "DefaultCirCode"] } return "[RepertoireDuGenome]/${CC}_AllInMotif.txt" } proc PossiblesOrganismsInStartFile {FichierStart} { set LesOrganismesPossibles {} foreach Ligne [LesLignesDuFicher $FichierStart] { if {[regexp {>[^_]+_([^_]+)_} $Ligne Match Org]} { if {[info exists DejaVu($Org)]} { break } set DejaVu($Org) 1 lappend LesOrganismesPossibles $Org } } return $LesOrganismesPossibles } proc CreateMAMAsProject {{NewName ""} {FichierStart ""} {Prefixe ""} {Reference ""}} { #rR This has to be run with #rR setgscoperr Zero #rR gscope CreateMAMAsProject MAMI /home/ripp/circo/knownCanonical.exonNuc.fa mami #rR gscope CreateMAMAsProject MUMU /home/ripp/circo/knownCanonical.exonNuc.fa MUMU mm10 #rR then setgscoperr MAMI #rR gscope CompleteMAMAsProject while {$NewName==""} { FaireLire "Please give the name of the project to create" set NewName [Entre "MMMM"] if {[regexp -nocase {^[a-z][a-z0-9_]+$} $NewName]} { break } FaireLire "Please give a better name" set NewName "" } if {$Prefixe==""} { FaireLire "please give a prefix" set Prefixe [Entre $NewName] } while {[FileAbsent $FichierStart] && $FichierStart!="UseNucAliTfa"} { FaireLire "Please help me to find knownCanonical.exonNuc.fa" set FichierStart [ButineArborescence "All" "."] if {$FichierStart==""} { if {[OuiOuNon "I'll stop"]} { exit } } } set IndexReference 0 ; #rR for hg38 if {$Reference!=""} { set IndexReference [lsearch -exact [WantedOrganisms] $Reference] if {$IndexReference<0} { FaireLire "I cannot find $Reference in [WantedOrganisms]" exit } set ReferenceTaxId 99999999 if {$Reference=="hg38"} { set ReferenceTaxId 9606 } if {$Reference=="mm10"} { set ReferenceTaxId 10090 } if {$ReferenceTaxId=="99999999"} { FaireLire "Please give the TaxId of $Reference" set ReferenceTaxId [Entre] } } set RepGstock "/gstock/$NewName" file mkdir $RepGstock set Link "/genomics/link/$NewName" if {[file exists $Link]} { if { ! [OuiOuNon "$Link already exists. Can I use it ?"]} { exit } } else { exec ln -s $RepGstock $Link } if {1} { file mkdir "$Link/nucalitfa" if {$FichierStart!="UseNucAliTfa"} { CreateNucAliTfa $FichierStart "$Link/nucalitfa" $Prefixe } CreateNucAndProtTfaAndMsfFromNucAliTfaPourTous $Link $Prefixe $IndexReference NousAllonsAuBoulot $Link exec ln -s msfNuc3 msf OnRevientDuBoulot file mkdir "$Link/beton" file mkdir "$Link/fiches" set FNum "5.5d" set Premier 1 set DernierPAB [lindex [lsort [glob $Link/nuctfa/$Prefixe*]] end] set Dernier [llength [glob $Link/nuctfa/$Prefixe*]] if { ! [regexp {[^0-9]([0-9]+)$} $DernierPAB Match Dernier]} { FaireLire "Wrong list of PABs" } CreeBornesDesPABsTroisGradins $Premier $Dernier $Prefixe "" "" $FNum $Link CreeFichierMiniConfig $Prefixe $FNum "Collection of mrna" "" "" $ReferenceTaxId $Link AppendAuFichier "$Link/beton/miniconfig" "OnTraiteLike CircularCode" set Message "You can now run \nsetgscoperr $NewName\ngscope CompleteMAMAsProject\n" FaireLire $Message Espionne $Message } return $Message } proc CompleteMAMAsProject {} { #rR to run after CreateMAMAsProject NewName /home/ripp/circo/knownCanonical.exonNuc.fa NewPrefixe #rR setgscoperr NewNAME #rR gscope puts CompleteMAMAsProject file mkdir "[RepertoireDuGenome]/banques" NousAllonsAuBoulot "[RepertoireDuGenome]/banques" catch {exec ln -s /genomics/link/EHomsa/banques/AllProttfa blastpdatabase} OnRevientDuBoulot file mkdir "[RepertoireDuGenome]/blastp" Sauve "-v 10\n-b 10\n-F F\n-e 1.e-10" dans "[RepertoireDuGenome]/blastp/BlastParameters.txt" #return "Please run BlastPPourTous" BlastPPourTous InformeEHomsaReferencePourTous InformeChromoLocPourTous CompleteMAMAsStatistics } proc CompleteMAMAsStatisticsForAll {{Start ""} {Stop ""} {Reverse "" }} { global CodonMatrix set LesRandom [CirCode ListOf Index] if {$Start==""} { set Start 0 } if {$Stop ==""} { set Stop end } set LesRandom [lrange $LesRandom $Start $Stop] if {[string equal -nocase "Reverse" $Reverse]} { set LesRandom [lreverse $LesRandom] } foreach Random $LesRandom { #if {0 && [file exists [SumOfPairsDir $Random]]} { Espionne [SumOfPairsDir $Random] already exists. I skip. ; continue } CompleteMAMAsStatistics $Random "Force" } return } proc CompleteMAMAsStatistics {{Random ""} {Ask ""}} { JeMeSignale if {$Ask==""} { set Ask "Ask" } set Ask [string equal -nocase $Ask "Ask"] if {$Random!=""} { CirCode DefaultCirCode $Random Espionne [CirCode DefaultCirCode] [CirCode] if { $Ask && ! [OuiOuNon "Are you shure you want to run CompleteMAMAsStatistics with CirCode [CirCode DefaultCirCode]\n[CirCode]"]} { return } } #rR This MUST be done !!! Otherwize we cumulate global CodonMatrix global CodonMatrixHuman global IsX0 if {[info exists CodonMatrix]} { unset CodonMatrix } if {[info exists CodonMatrixHuman]} { unset CodonMatrixHuman } if {[info exists IsX0]} { unset IsX0 } #CreateMacsimsWithMotifsX #CalculSumOfPairsForCodonsAndCodonMatrix #return set NbNew [CalculSumOfPairsForBases] set Modified [StatistiquesXMotifs $NbNew] CreateRplots #if {[OnTraite JoyScCDS]} { return } CalculSumOfPairsForCodonsAndCodonMatrix StatisticsForCodons } proc StatisticsForCodons {} { #rR To do !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! } proc CircoDir {} { set Rep "/home/ripp/circo" return $Rep } proc WantedOrganism {{Org ""}} { return [expr [lsearch [WantedOrganisms] $Org]>=0] } proc WantedOrganisms {} { if {[OnTraite MAMA Like]} { return [list hg38 tupBel1 mm10 canFam3] } if {[OnTraite MUMU Like]} { return [list hg38 mm10] } if {[OnTraite JoyScCDS]} { return [JoyGenome ListOf JoyCDS] } set LesOrgs [list hg38 mm10] #set LesOrgs [list hg38 tupBel1 mm10 canFam3] #set LesOrgs [list hg38 panTro4 panPan1 gorGor3 ponAbe2 nomLeu3 rheMac3 macFas5 papAnu2 chlSab2 nasLar1 rhiRox1 calJac3 saiBol1 tarSyr2 micMur1 otoGar3 tupBel1 mm10 canFam3] return $LesOrgs } proc CreateNucAliTfa {Fichier Rep Prefixe} { #set Fichier "$Rep/48.fa" #set Fichier "$Rep/20000.fa" set LesAccess {} set PreviousIsEmpty 0 set LesLignesExon {} set NbOrg 20 foreach Ligne [concat [LesLignesDuFichier $Fichier] [list ""]] { #rR Access1 Exon1 Org1 Entete #rR Access1 Exon1 Org1 Seq #rR Access1 Exon1 Org2 Entete #rR Access1 Exon1 Org2 Seq #rR ligne vide #rR Access1 Exon2 Org1 Entete #rR Access1 Exon2 Org1 Seq #rR Access1 Exon2 Org2 Entete #rR Access1 Exon2 Org2 Seq #rR ligne vide #rR Access2 Exon1 Org1 Entete #rR Access2 Exon1 Org1 Seq #rR Access2 Exon1 Org2 Entete #rR Access2 Exon1 Org2 Seq #rR ligne vide #rR Access2 Exon2 Org1 Entete #rR Access2 Exon2 Org1 Seq #rR Access2 Exon2 Org2 Entete #rR Access2 Exon2 Org2 Seq #rR ligne vide #rR Access3 ... etc. #Espionne = $Ligne = #Espionne $PreviousIsEmpty if {$Ligne!=""} { set PreviousIsEmpty 0 lappend LesLignesExon $Ligne } else { #rR Ligne is empty if {$PreviousIsEmpty} { continue } foreach {Entete SeqExon} $LesLignesExon { incr IemeOrg set Localisation "" lassign [split $Entete " "] AOIN Long X Y Localisation lassign [split $AOIN "_"] Access Org IemeExon NbExons if { ! [WantedOrganism $Org] } { continue } regsub ">" $Access "" Access set NbExonsDe($Access) $NbExons if { ! [info exists DejaVuAccess($Access)]} { set DejaVuAccess($Access) 1 ; lappend LesAccess $Access } if { ! [info exists SeqDe($Access,$Org)]} { set SeqDe($Access,$Org) "" } append SeqDe($Access,$Org) $SeqExon lappend LesInfosDe($Access,$Org) $Localisation if { ! [info exists DejaPrisCetOrg($Org)]} { set DejaPrisCetOrg($Org) 1 ; lappend LesOrgs $Org } } set LesLignesExon {} set PreviousIsEmpty 1 } } #rR on a tout lu et stocké set N 0 foreach Access $LesAccess { incr N set NomAli [format "%s%5.5d" $Prefixe $N] set FicAli "$Rep/$NomAli" set LesES {} if {[info exists DejaVuLOrg]} { unset DejaVuLOrg } foreach Org $LesOrgs { if {[info exists DejaVuLOrg($Org)]} { break } set DejaVuLOrg($Org) 1 set Seq $SeqDe($Access,$Org) set NbExons $NbExonsDe($Access) set Entete ">${Access}_$Org $NbExons exons localisation : [join $LesInfosDe($Access,$Org) ,]" lappend LesES $Entete $Seq } set LesTFAsAlignes {} foreach {Entete Seq} $LesES { set LesDebDeSequence [regexp -all -indices -inline {\-[^-]} "-$Seq-"] set LesFinDeSequence [regexp -all -indices -inline {[^-]-} "-$Seq-"] foreach DS $LesDebDeSequence { lassign $DS D0 D1 set Mo [expr {$D0%3}] if {$Mo==0} { continue } if {$Mo==1} { set Seq [string replace $Seq $D0-1 $D0-1 "n"] } if {$Mo==2} { set Seq [string replace $Seq $D0-2 $D0-1 "nn"] } } foreach FS $LesFinDeSequence { lassign $FS F0 F1 set Mo [expr {$F0%3}] if {$Mo==0} { continue } if {$Mo==1} { set Seq [string replace $Seq $F0 $F0+1 "nn"] } if {$Mo==2} { set Seq [string replace $Seq $F0 $F0 "n"] } } set S "" foreach {a b c} [split $Seq ""] { append S "[string totitle $a$b$c]" } #Espionne $S set TFA [SequenceFormatTFA $Seq $Entete nucbrut] lappend LesTFAsAlignes $TFA } set FA [SauveLesLignes $LesTFAsAlignes dans $FicAli] Espionne $FA } Espionne $LesAccess return "Ok, I created $N $Prefixe ... up to $NomAli" } proc LongueurTotale {Org} { set TO 0 set TN 0 set NbN 0 foreach Nom [ListeDesPABs] { set SeqO [MAMAsSeqOld $Nom $Org nuc nogap] set SeqN [MAMAsSeq $Nom $Org nuc nogap] set LO [string length $SeqO] set LN [string length $SeqN] if {$SeqO ne $SeqN} { set SeqNsansN [string trimright $SeqN "n"] if {$SeqO eq $SeqNsansN} { incr NbN } else { Espionne "$SeqO\n$SeqNsansN\n$SeqN" } } incr TO $LO incr TN $LN } return "$TO $TN $NbN" } proc CompareX {} { set FAv [RepertoireDuGenome]/fiches/OLD/RatioXMotifspanTro4 set FAp [RepertoireDuGenome]/fiches/RatioXMotifspanTro4 foreach Avant [LesLignesDuFichier $FAv] Apres [LesLignesDuFichier $FAp] { if {$Avant==$Apres} { continue } Espionne Espionne "$Avant\n$Apres" } } proc InformeChromoLocPourTous {{Qui ""} {Quoi ""}} { foreach Nom [ListeDesPABs] { set Entete [EnteteDuFichierTFA [GscopeFile $Nom nuctfa]] lassign [split $Entete " "] Access NbExons ExEx Lolo DD ChromoLocs if { ! [regexp {>([^_]+)_(.+)$} $Access Match Access Org]} { FaireLire "Wrong access $Entete" } set MinLoc 0 set MaxLoc 0 foreach Loc [split $ChromoLocs ","] { if { ! [regexp {^([^\:]+)\:([0-9]+)\-([0-9]+)([\-\+])$} $Loc Match Chr Deb Fin Sens] } { FaireLire "Wrong loc =$Loc=" } if {$MinLoc==0} { set MinLoc $Deb } set MaxLoc $Fin } InformeSansDemander $Nom "=Access: $Access\n=Chromo: $Chr\n=NbExons: $NbExons\n=ChromoSens: $Sens\n=ChromoCover: $MinLoc $MaxLoc\n=ChromoLocalisation: $ChromoLocs" } return "" } proc ChromoCount {} { foreach Chr [OnChro ListOf Chr] { Espionne [format "%6d %s" [OnChro $Chr NbGenes] $Chr] } } proc OnChro {{Qui ""} {Quoi ""}} { global OnChro if {[info exists OnChro($Qui,$Quoi)]} { return $OnChro($Qui,$Quoi) } if {[info exists OnChro("EstCharge")]} { return "" } set OnChro("EstCharge") 1 foreach Nom [lrange [ListeDesPABs] 0 end] { set Chr [ExtraitInfo $Nom "Chromo:"] lappend LesChr $Chr scan [ExtraitInfo $Nom "ChromoCover:"] "%d %d" Mi Ma set Long [expr $Ma-$Mi] set OnChro($Nom,Chr) $Chr set OnChro($Nom,Start) $Mi set OnChro($Nom,End) $Ma set OnChro($Nom,Length) $Long if { ! [info exists OnChro($Chr,Min)]} { set OnChro($Chr,Min) $Mi } if { ! [info exists OnChro($Chr,Max)]} { set OnChro($Chr,Max) $Mi } set OnChro($Chr,Min) [Mini $OnChro($Chr,Min) $Mi] set OnChro($Chr,Max) [Maxi $OnChro($Chr,Max) $Ma] lappend OnChro($Chr,ListOfGenes) $Nom } set LesChr [lsort -unique $LesChr] set OnChro(ListOf,Chr) $LesChr foreach Chr $LesChr { set MinC $OnChro($Chr,Min) set MaxC $OnChro($Chr,Max) foreach Nom $OnChro($Chr,ListOfGenes) { set PosRel [expr (100.*($OnChro($Nom,Start)-$MinC))/$MaxC] set OnChro($Nom,PosRel) $PosRel } set OnChro($Chr,NbGenes) [llength $OnChro($Chr,ListOfGenes)] } return [OnChro $Qui $Quoi] } proc SumOfPairsForCodonsAndCodonMatrix {Pilier Frame} { global CodonMatrix set SumPairs 0 set NbElements 0 set Taille [llength $Pilier] for {set i 0} {$i<$Taille} {incr i} { incr NbElements for {set j [expr {$i+1}]} {$j<$Taille} {incr j} { set Codon1 [string toupper [lindex $Pilier $i]] set Codon2 [string toupper [lindex $Pilier $j]] if { ! [regexp {[\-N]} $Codon1$Codon2]} { if {$Codon1 eq $Codon2} { incr SumPairs 1 if { ! [info exists CodonMatrix($Frame,$Codon1,$Codon1)] } { set CodonMatrix($Frame,$Codon1,$Codon1) 0 } incr CodonMatrix($Frame,$Codon1,$Codon1) incr CodonMatrix($Frame,COUNT,EQUAL) } else { lassign [lsort [list $Codon1 $Codon2]] C1 C2 if { ! [info exists CodonMatrix($Frame,$C1,$C2)] } { set CodonMatrix($Frame,$C1,$C2) 0 } incr CodonMatrix($Frame,$C1,$C2) incr CodonMatrix($Frame,COUNT,SUBSTITUTION) set CodonMatrix($Frame,$C2,$C1) $CodonMatrix($Frame,$C1,$C2) } } } } return "$SumPairs $NbElements" } proc SPCandCMforReference {Pilier Frame Caller} { #JeMeSignale global CodonMatrixHuman set SumPairs 0 set NbElements 1 set Taille [llength $Pilier] for {set j 1} {$j<$Taille} {incr j} { #rR carefull we incr here also incr NbElements set Codon1 [string toupper [lindex $Pilier 0]] set Codon2 [string toupper [lindex $Pilier $j]] if { ! [regexp {[\-N]} $Codon1$Codon2] } { if {$Codon1 eq $Codon2} { incr SumPairs 1 if { ! [info exists CodonMatrixHuman($Frame,$Codon1,$Codon1)] } { set CodonMatrixHuman($Frame,$Codon1,$Codon1) 0 } incr CodonMatrixHuman($Frame,$Codon1,$Codon1) incr CodonMatrixHuman($Frame,COUNT,EQUAL) } else { if { ! [info exists CodonMatrixHuman($Frame,$Codon1,$Codon2)] } { set CodonMatrixHuman($Frame,$Codon1,$Codon2) 0 } incr CodonMatrixHuman($Frame,$Codon1,$Codon2) incr CodonMatrixHuman($Frame,COUNT,SUBSTITUTION) } } } return "$SumPairs $NbElements" } proc OLD_SPCandCMforReference {Pilier Frame Caller} { #JeMeSignale global CodonMatrixHuman set SumPairs 0 set NbElements 0 set Taille [llength $Pilier] for {set i 0} {$i<1} {incr i} { ; #rR we use only the first line !!!!!!!!!!!!!! incr NbElements for {set j [expr {$i+1}]} {$j<$Taille} {incr j} { #rR carefull we incr here also incr NbElements set Codon1 [string toupper [lindex $Pilier $i]] set Codon2 [string toupper [lindex $Pilier $j]] if { ! [regexp {[\-N]} $Codon1$Codon2]} { if {$Codon1 eq $Codon2} { incr SumPairs 1 if { ! [info exists CodonMatrixHuman($Frame,$Codon1,$Codon1)] } { set CodonMatrixHuman($Frame,$Codon1,$Codon1) 0 } incr CodonMatrixHuman($Frame,$Codon1,$Codon1) incr CodonMatrixHuman($Frame,COUNT,EQUAL) } else { lassign [lsort [list $Codon1 $Codon2]] C1 C2 if { ! [info exists CodonMatrixHuman($Frame,$C1,$C2)] } { set CodonMatrixHuman($Frame,$C1,$C2) 0 } incr CodonMatrixHuman($Frame,$C1,$C2) incr CodonMatrixHuman($Frame,COUNT,SUBSTITUTION) set CodonMatrixHuman($Frame,$C2,$C1) $CodonMatrixHuman($Frame,$C1,$C2) } } } } return "$SumPairs $NbElements" } proc CalculSumOfPairsForCodonsAndCodonMatrix {{Liste ""} {Save ""}} { #rR Here we calculate 2 times several things ... : #rR 1/ for the original SumOfPairs #rR 2/ and the one starting from the ReferenceOrg (ie hg38 or JoyScCDS) and comparing only changes to it. #rR We create a file for each Nom #rR and calculate CodonMatrices : CodonMatrix.txt and CodonMatrixHuman.txt ... #rR both can be used (but not simultaneously) by the proc CodonMatrix depending on the lines commented or not #rR #array set CodonMatrix [ContenuDuFichier [CodonMatrixFile]] #rR array set CodonMatrix [ContenuDuFichier [CodonMatrixFileHuman]] #rR MAYBE we have to write two procs : CodonMatrix and CodonMatrixHuman !!!!!!!!!!!!!!!!!!! #rR the array CodonMatrix is cumulated in the proc SumOfPairsForCodonsAndCodonMatrix #rR the array CodonMatrixHuman is cumulated in the proc SPCandCMforReference #rR these procs receive as arguments the Pilier PilierMotif or PilierHuman and PilierMotifHuman respectively #rR ATTENTION ATTENTION since 2018/03/07 CodonMatrixHuman is NO MORE a symmetric matrix #rR the diagonal is the count of nb of EQUALity and other the count of mutations (both starting from ReferenceOrg) global CodonMatrix global CodonMatrixHuman global IsX0 if {[info exists CodonMatrix]} { unset CodonMatrix } if {[info exists CodonMatrixHuman]} { unset CodonMatrixHuman } if {[info exists IsX0]} { unset IsX0 } set ReferenceOrg [ReferenceOrg] set IndexReferenceOrg [lsearch -exact [WantedOrganisms] $ReferenceOrg] #rR Caution : this must be run in one shot because we cumulate CodonMatrix for all when doing SumOfPairs #Position NbMotF0 SumMotF0 RatioSumMot/Div NbMotF1 SumMotF1 RatioSumMotF1/Div NbMotF2 SumMotF2 RatioSumMotF2/Div NbTotalMot NbLinesParticipant SumPairs RatioSumPairs/Div if {$Save=="AddToCodonMatrix"} { set Save 1 array set CodonMatrix [ContenuDuFichier [CodonMatrixFile]] } if {$Save==""} { set Save 1 } set RepSPC [SumOfPairsForCodonsDir] ; #rR this depends on CirCode CC : CC_SumOfPairsForCodons file mkdir $RepSPC set RepSPCHuman [SumOfPairsForCodonsHumanDir] ; #rR this depends on CirCode CC : CC_SumOfPairsForCodonsHuman file mkdir $RepSPCHuman if {$Liste==""} { set Liste [lrange [ListeDesPABs] 0 end] } set SpyEvery 20 set SpyCount -1 foreach Nom $Liste { if { 0 && [file exists "$RepSPC/$Nom"]} { continue } if { 0 && [PasTouche $Nom]} { continue } if {([incr SpyCount]%$SpyEvery)==0} { Espionne [CirCode "DefaultCirCode"] $Nom } set LesLignesSumOfPairs {} set LesLignesSumOfPairsHuman {} set LongueurSequence 0 if {[info exists LesDFDu] } { unset LesDFDu } ;#rR Debut et Fin from ... if {[info exists SequenceDu] } { unset SequenceDu } if {[info exists LesPositionsFrame] } { unset LesPositionsFrame } if {[info exists LesPositionsMotifs] } { unset LesPositionsMotifs } if {[info exists LesPositionsSansTirets] } { unset LesPositionsSansTirets } if {[info exists InMotif] } { unset InMotif } #rR unset all Pilier if {[info exists Pilier] } { unset Pilier } ; #rR See below more explanations if {[info exists PilierMotif] } { unset PilierMotif } if {[info exists PilierHuman] } { unset PilierHuman } if {[info exists PilierMotifHuman] } { unset PilierMotifHuman } set LesAccess [MAMAsSeq $Nom listof org] if {$LesAccess=={}} { continue } ; #rR foreach Access $LesAccess { set SequenceDu($Access) [string toupper [MAMAsSeq $Nom $Access nuc gap]] } foreach Access $LesAccess { set SequenceNonGap [string toupper [MAMAsSeq $Nom $Access nuc nogap]] foreach Frame [list 0 1 2] { set LesPositionsMotifs($Frame,$Access) {} set LesPositionsSansTirets($Frame,$Access) {} set XMotifsSequence [XMotifs $SequenceNonGap $Frame] set LesDFDu($Frame,$Access) {} foreach {K V} $XMotifsSequence { foreach DF $V { lappend LesDFDu($Frame,$Access) $DF } } if {$LesDFDu($Frame,$Access)=={}} { continue } set LesDFDu($Frame,$Access) [lsort -integer -index 0 $LesDFDu($Frame,$Access)] foreach DF $LesDFDu($Frame,$Access) { lassign $DF D F incr F 2 set DG [PositionsLocalesVersGlobales $SequenceDu($Access) $D] set FG [PositionsLocalesVersGlobales $SequenceDu($Access) $F] for {set i $DG} {$i<=$FG} {incr i 3} { lappend LesPositionsMotifs($Frame,$Access) $i } } foreach P $LesPositionsMotifs($Frame,$Access) { set Codon [string range $SequenceDu($Access) $P $P+2] if { ! [regexp {\-} $Codon]} { lappend LesPositionsSansTirets($Frame,$Access) $P set InMotif($Frame,$Access,$P) 1 } } } } foreach {K V} [array get InMotif] { set AllInMotif($Nom,$K) $V } foreach Frame [list 0 1 2] { foreach Access $LesAccess { LConcat LesPositionsFrame($Frame) $LesPositionsSansTirets($Frame,$Access) } set LesPositionsFrame($Frame) [lsort -integer -increasing $LesPositionsFrame($Frame)] } set LongueurSequence [string length $SequenceDu($Access)] for {set i 0} {$i<=$LongueurSequence} {incr i 3} { #rR Réinitialisation de tous les piliers set Pilier($i) {} ; #rR Tous les codons non vides d'une colonne set PilierMotif(0,$i) {} ; #rR Tous les codons qui sont dans un motif en frame 0 set PilierMotif(1,$i) {} ; #rR Tous les codons qui sont dans un motif en frame 1 set PilierMotif(2,$i) {} ; #rR Tous les codons qui sont dans un motif en frame 2 set PilierHuman($i) {} ; #rR Tous les codons d'une colonne où Human est présent set PilierMotifHuman(0,$i) {} ; #rR Tous les codons d'une colonne où Human est présent et est dans un motif en frame 0 set PilierMotifHuman(1,$i) {} ; #rR Tous les codons d'une colonne où Human est présent et est dans un motif en frame 1 set PilierMotifHuman(2,$i) {} ; #rR Tous les codons d'une colonne où Human est présent et est dans un motif en frame 2 set HumanIsThere 0 if {[lindex $LesAccess $IndexReferenceOrg]==$ReferenceOrg} { set HumanIsThere 1 } ; #rR problem ??? if {$HumanIsThere} { #rR First we initialise with the ReferenceOrg set CodonOfFrame(0) [string range $SequenceDu($ReferenceOrg) $i $i+2] set CodonOfFrame(1) [string range $SequenceDu($ReferenceOrg) $i-2 $i] set CodonOfFrame(2) [string range $SequenceDu($ReferenceOrg) $i-1 $i+1] set Codon $CodonOfFrame(0) #rR 20180202 i changed to have exactly 3 bases if {[regexp {^[ATGC]{3}$} $Codon]} { lappend Pilier($i) $Codon } if {[regexp {^[ATGC]{3}$} $Codon]} { lappend PilierHuman($i) $Codon } foreach Frame [list 0 1 2] { if {[info exists InMotif($Frame,$ReferenceOrg,$i)]} { #Espionne $Frame $ReferenceOrg $i $CodonOfFrame($Frame) $PilierMotif($Frame,$i) lappend PilierMotif($Frame,$i) $CodonOfFrame($Frame) } } foreach Frame [list 0 1 2] { #rR It's should work because ReferenceOrg is the first if {[info exists InMotif($Frame,$ReferenceOrg,$i)] && [regexp {^[ATGC]{3}$} $CodonOfFrame($Frame)]} { lappend PilierMotifHuman($Frame,$i) $CodonOfFrame($Frame) } } } foreach Access $LesAccess { #rR Now we can cumulate but not for ReferenceOrg if {$Access==$ReferenceOrg} { continue } set CodonOfFrame(0) [string range $SequenceDu($Access) $i $i+2] set CodonOfFrame(1) [string range $SequenceDu($Access) $i-2 $i] set CodonOfFrame(2) [string range $SequenceDu($Access) $i-1 $i+1] set Codon $CodonOfFrame(0) if {[regexp {^[ATGC]{3}$} $Codon]} { lappend Pilier($i) $Codon } if {[regexp {^[ATGC]{3}$} $Codon]} { lappend PilierHuman($i) $Codon } foreach Frame [list 0 1 2] { if {[info exists InMotif($Frame,$Access,$i)]} { #Espionne $Frame $Access $i $CodonOfFrame($Frame) $PilierMotif($Frame,$i) lappend PilierMotif($Frame,$i) $CodonOfFrame($Frame) } } foreach Frame [list 0 1 2] { #rR is not empty if ReferenceOrg was there. if {$PilierMotifHuman($Frame,$i)!={}} { if {[regexp {^[ATGC]{3}$} $CodonOfFrame($Frame)]} { lappend PilierMotifHuman($Frame,$i) $CodonOfFrame($Frame) } } } } } #rR All Pilier... are now calculated and the ReferenceOrg is the first in each Pilier... for {set i 0} {$i<=$LongueurSequence} {incr i 3} { #rR 2018/03/09 10:00 I remove the test for frame 1 and 2 ... We have to run again 00 01 02 03 04 05 if {( ! [info exists Pilier($i)] || [llength $Pilier($i)]<2) || ([OnTraite SubMAMA Like] && [info exists InMotif(0,$ReferenceOrg,$i)]) } { set NbLignes [llength $Pilier($i)] set SommeDesPairs 0 set RatioSommeNbLignes 0 } else { lassign [SumOfPairsForCodonsAndCodonMatrix $Pilier($i) 3] SommeDesPairs NbLignes set Diviseur [expr {($NbLignes*($NbLignes-1))/2}] set RatioSommeNbLignes [expr {1.0*$SommeDesPairs/$Diviseur}] } foreach Frame [list 0 1 2] { if { ! [info exists PilierMotif($Frame,$i)] || [llength $PilierMotif($Frame,$i)]<2} { set NbrOfMotifs($Frame) 0 set SumOfMotifs($Frame) 0 set RatioMotifs($Frame) 0 } else { #Espionne hourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrahhourrah #Espionne $Frame $i $PilierMotif($Frame,$i) lassign [SumOfPairsForCodonsAndCodonMatrix $PilierMotif($Frame,$i) $Frame] SumOfMotifs($Frame) NbrOfMotifs($Frame) set Diviseur [expr {($NbrOfMotifs($Frame)*($NbrOfMotifs($Frame)-1))/2}] set RatioMotifs($Frame) [expr {1.0*$SumOfMotifs(0)/$Diviseur}] } } set LesColonnes {} lappend LesColonnes [expr {$i+1}] foreach Frame [list 0 1 2] { lappend LesColonnes $NbrOfMotifs($Frame) lappend LesColonnes $SumOfMotifs($Frame) lappend LesColonnes $RatioMotifs($Frame) } lappend LesColonnes [expr {$NbrOfMotifs(0)+$NbrOfMotifs(1)+$NbrOfMotifs(2)}] lappend LesColonnes $NbLignes lappend LesColonnes $SommeDesPairs lappend LesColonnes $RatioSommeNbLignes set LineSumPairs [join $LesColonnes "\t"] lappend LesLignesSumOfPairs $LineSumPairs #rR Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human #rR Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human #rR Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human Human #rR 2018/03/09 10:00 I remove the test for frame 1 and 2 ... We have to run again 00 01 02 03 04 05 if {( ! [info exists PilierHuman($i)] || [llength $PilierHuman($i)]<2) || ([OnTraite SubMAMA Like] && [info exists InMotif(0,$ReferenceOrg,$i)]) } { set NbLignesHuman [llength $PilierHuman($i)] set SommeDesPairsHuman 0 set RatioSommeNbLignesHuman 0 } else { set SN [SPCandCMforReference $PilierHuman($i) 3 "AllFor$i"] lassign $SN SommeDesPairsHuman NbLignesHuman set Diviseur [expr {($NbLignesHuman*($NbLignesHuman-1))/2}] set RatioSommeNbLignesHuman [expr {1.0*$SommeDesPairsHuman /$Diviseur}] } foreach Frame [list 0 1 2] { if { ! [info exists PilierMotifHuman($Frame,$i)] || [llength $PilierMotifHuman($Frame,$i)]<2 } { set NbrOfMotifsHuman($Frame) 0 set SumOfMotifsHuman($Frame) 0 set RatioMotifsHuman($Frame) 0 } else { set SN [SPCandCMforReference $PilierMotifHuman($Frame,$i) $Frame "MotifFor$i"] lassign $SN SumOfMotifsHuman($Frame) NbrOfMotifsHuman($Frame) set Diviseur [expr {($NbrOfMotifsHuman($Frame)*($NbrOfMotifsHuman($Frame)-1))/2}] set RatioMotifsHuman($Frame) [expr {(1.0*$SumOfMotifsHuman($Frame))/$Diviseur}] } } set LesColonnes {} lappend LesColonnes [expr {$i+1}] foreach Frame [list 0 1 2] { lappend LesColonnes $NbrOfMotifsHuman($Frame) lappend LesColonnes $SumOfMotifsHuman($Frame) lappend LesColonnes $RatioMotifsHuman($Frame) } lappend LesColonnes [expr {$NbrOfMotifsHuman(0)+$NbrOfMotifsHuman(1)+$NbrOfMotifsHuman(2)}] lappend LesColonnes $NbLignesHuman lappend LesColonnes $SommeDesPairsHuman lappend LesColonnes $RatioSommeNbLignesHuman set LineSumPairsHuman [join $LesColonnes "\t"] lappend LesLignesSumOfPairsHuman $LineSumPairsHuman } if {$Save} { SauveLesLignes $LesLignesSumOfPairs dans "$RepSPC/$Nom" } if {$Save} { SauveLesLignes $LesLignesSumOfPairsHuman dans "$RepSPCHuman/$Nom" } #FinPasTouche $Nom } Sauve [array get CodonMatrix] in [CodonMatrixFile] Sauve [array get CodonMatrixHuman] in [CodonMatrixFileHuman] #Sauve [array get AllInMotif] in [AllInMotifFile] } proc AllInMotif {{Qui ""} {Quoi ""}} { global AllInMotif if {[info exists AllInMotif($Qui,$Quoi)]} { return $AllInMotif($Qui,$Quoi) } if {[info exists AllInMotif("EstCharge")]} { return "" } array set AllInMotif [ContenuDuFichier [AllInMotifFile]] set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] foreach {K V} [array get AllInMotif] { lassign [split $K ","] Nom Frame Access Position #Espionne $K $Nom $Frame $Access $Position set NOM [string toupper $Nom] set ACCESS [string toupper $Access] lappend AllInMotif(LISTOF,NOM) $Nom lappend AllInMotif($NOM,LISTOFACCESS) $Access lappend AllInMotif($NOM,LISTOFFRAME) $Frame lappend AllInMotif($NOM-$Frame,LISTOFPOS) "$Position $Access" lappend AllInMotif($NOM-$ACCESS-$Frame,LISTOFPOS) $Position } set AllInMotif(LISTOF,NOM) [lsort -unique $AllInMotif(LISTOF,NOM)] foreach Nom $AllInMotif(LISTOF,NOM) { set AllInMotif($NOM,LISTOFACCESS) [lsort -unique $AllInMotif($NOM,LISTOFACCESS)] set AllInMotif($NOM,LISTOFFRAME) [lsort -unique $AllInMotif($NOM,LISTOFFRAME)] foreach Access $AllInMotif($NOM,LISTOFACCESS) { set ACCESS [string toupper $Access] foreach Frame $AllInMotif($NOM,LISTOFFRAME) { set AllInMotif($NOM-$ACCESS-$Frame,LISTOFPOS) [lsort -integer $AllInMotif($NOM-$ACCESS-$Frame,LISTOFPOS)] } } foreach Frame $AllInMotif($NOM,LISTOFFRAME) { #Espionne $NOM-$Frame, #Espionne $AllInMotif($NOM-$Frame,LISTOFPOS) set AllInMotif($NOM-$Frame,LISTOFPOS) [lsort -integer -index 0 $AllInMotif($NOM-$Frame,LISTOFPOS)] #Espionne $AllInMotif($NOM-$Frame,LISTOFPOS) } } set AllInMotif("EstCharge") 1 return [AllInMotif $Qui $Quoi] } proc AddFeatures {Nom FichierXml args} { #rR 20180329 I changed the order CC FeaturesDirs #rR Est appele par ordali uniquement global TabSF #CreeLesPiliers #RunOrdali if {$args!={}} { set CC [lindex $args 0] set args [lrange $args 1 end] } set FeaturesDirs $args if {$FeaturesDirs==""} { set FeaturesDirs [list [X0MotifsFeaturesDir $CC] [X1MotifsFeaturesDir $CC] [X2MotifsFeaturesDir $CC]] } foreach FeaturesDir $FeaturesDirs { array unset TabSF "*,$FeaturesDir" ReadRawFeatureFile $FeaturesDir/$Nom } SauveLAlignement XML $FichierXml exit } proc CreateMacsimsWithMotifsXForAll {{LesRandom ""}} { if {$LesRandom==""} { set LesRandom [CirCode ListOf Index] } #set LesRandom [list 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] foreach Random $LesRandom { Espionne $Random CirCode "DefaultCirCode" $Random set PatName "CreateMacsimsWithMotifsX_$Random" if {[PasTouche $PatName]} { continue } #rR il faudra enlever la ligne suivante if {[file exists [MacsimXmlDir]]} { Espionne [MacsimXmlDir] already exists. I skip. ; FinPasTouche $PatName ; continue } CreateMacsimsWithMotifsX lappend LesRandomFait $Random FinPasTouche $PatName } return $LesRandomFait } proc CreateMacsimsWithMotifsX {{Liste ""} {NbCodons ""} {Cardinality ""}} { #rR 2019/10/02 j'ai rajouté NbCondos et Cardinality pour Johan #rR On change touts les noms de directories pour ces specific set NbCodons 3 set Cardinality 1 set Specific "$NbCodons$Cardinality" set MacsimXmlDir "[MacsimXmlDir]$Specific" set MacsimRsfDir "[MacsimRsfDir]$Specific" set Couleur(0) "yellow" set Couleur(1) "orange" set Couleur(2) "red" file mkdir $MacsimXmlDir file mkdir $MacsimRsfDir if {$Liste==""} { set Liste [lrange [ListeDesPABs] 0 end] } foreach Nom $Liste { Espionne $Nom set FichierMsf [GscopeFile $Nom msf] set FichierMacsimXml "$MacsimXmlDir/$Nom" if {[FileAbsent $FichierMsf] } { continue } if {[FileExists $FichierMacsimXml]} { continue } foreach Frame [list 0 1 2] { set Dir "[X${Frame}MotifsFeaturesDir]$Specific" set FeaturesDir($Frame) $Dir file mkdir $Dir set LesLignesFeatures($Frame) [list "FEATURE X${Frame}Motifs"] } set LesAccess {} if {[info exists LesDFDu] } { unset LesDFDu } if {[info exists SequenceDu]} { unset SequenceDu } if {[info exists KduDF] } { unset KduDF } set F [GscopeFile $Nom nucalitfa] foreach Ligne [LesLignesDuFichier $F] { if {[regexp {>} $Ligne]} { set Entete $Ligne lassign [split $Entete " "] Access regsub ">" $Access "" Access lappend LesAccess $Access if { ! [info exists SequenceDu($Access)]} { set SequenceDu($Access) "" } } else { append SequenceDu($Access) $Ligne } } foreach Access $LesAccess { set SequenceNonGap "" regsub -all {\-} $SequenceDu($Access) "" SequenceNonGap foreach Frame [list 0 1 2] { set SequenceNonGapDeLaFrame [string range $SequenceNonGap $Frame end] #rR Rajout de NbCodon et Cardinality set XMotifsSequence [XMotifs $SequenceNonGapDeLaFrame 0 $NbCodons $Cardinality] set LesDFDu($Frame,$Access) {} foreach {K V} $XMotifsSequence { foreach DF $V { lappend LesDFDu($Frame,$Access) $DF set KduDF($Frame,$Access,$DF) $K } } } } set FeaturesDirs {} foreach Access $LesAccess { foreach Frame [list 0 1 2] { set LesDFDu($Frame,$Access) [lsort -integer -index 0 $LesDFDu($Frame,$Access)] foreach DF $LesDFDu($Frame,$Access) { lassign $DF D F incr F 2 incr D $Frame incr F $Frame incr D 1 incr F 1 regsub "," $KduDF($Frame,$Access,$DF) ";" KduDF($Frame,$Access,$DF) set LigneFeature "$Access;LOCAL;$D;$F;$Couleur($Frame);$KduDF($Frame,$Access,$DF)" lappend LesLignesFeatures($Frame) $LigneFeature } SauveLesLignes $LesLignesFeatures($Frame) dans "$FeaturesDir($Frame)/$Nom" lappend FeaturesDirs $FeaturesDir($Frame) } } set FeaturesDirs [lsort -unique $FeaturesDirs] set Commande "ordali $FichierMsf -batch 1 -exe AddFeatures $Nom $FichierMacsimXml [CirCode DefaultCirCode] $FeaturesDirs" Espionne $Commande if {[catch { exec {*}$Commande } Message]} { Warne "Something went wrong with \n$Commande\n I'll skip... Please fix the probleme and do it again later" } } } proc MAMAsSeq {{Nom ""} {Org ""} {NucOrProt ""} {Gap ""}} { set FicNucAliTfa [GscopeFile $Nom "nucalitfa"] global MAMAsSeq #rR MAMAsSeq MAMA00001 listof org ;# returns all organisms #rR MAMAsSeq MAMA00001 listof access ;# returns all access #rR MAMAsSeq MAMA00001 hg38 header ;# returns the header of the TFA file #rR MAMAsSeq MAMA00001 hg38 ;# returns sequence nuc of hg38 non gap #rR MAMAsSeq MAMA00001 hg38 nuc ;# returns sequence nuc of hg38 non gap (same as above) #rR MAMAsSeq MAMA00001 hg38 prot ;# returns sequence prot of hg38 non gap #rR MAMAsSeq MAMA00001 hg38 nuc gap ;# returns sequence nuc of hg38 with gaps #rR MAMAsSeq MAMA00001 hg38 prot gap ;# returns sequence prot of hg38 with gaps #rR MAMAsSeq MAMA00001 all ;# returns all sequences nuc non gap #rR MAMAsSeq MAMA00001 all nuc ;# returns all sequences nuc non gap #rR MAMAsSeq MAMA00001 all prot ;# returns all sequences prot non gap #rR MAMAsSeq MAMA00001 all nuc gap ;# returns all sequences nuc with gaps #rR MAMAsSeq MAMA00001 all prot gap ;# returns all sequences prot with gaps #rR MAMAsSeq MAMA00001 canFam3 ... returns what is asked for canFam3 instead of hg38 ! #rR Fin Help if {$Nom==""} { foreach Ligne [split [info body MAMAsSeq] "\n"] { if { ! [regexp "#rR" $Ligne]} { continue } if {[regexp "Fin Help" $Ligne]} { break } lappend LeHelp [string trim $Ligne] } return [join $LeHelp "\n"] } if {$Nom=="Unset"} { if {[info exists MAMAsSeq]} { unset MAMAsSeq } return "" } set Gap [string tolower $Gap] set NucOrProt [string tolower $NucOrProt] if {$Gap =="nogap"} { set Gap "" } if {$NucOrProt==""} { set NucOrProt "nuc" } if {$Org ==""} { set Org [ReferenceOrg] } if {[info exists MAMAsSeq($Nom,$Org,$NucOrProt,$Gap)]} { return $MAMAsSeq($Nom,$Org,$NucOrProt,$Gap) } if {[info exists MAMAsSeq($Nom,"EstCharge")]} { return "" } set MAMAsSeq($Nom,"EstCharge") 1 if {[FileAbsent $FicNucAliTfa]} { return "" } foreach {Access} [LaSequenceDuTFAs $FicNucAliTfa "LaListeDesAccess"] { set OrgaLu "" if {$OrgaLu==""} { regexp {(Joy[A-Z][a-z]CDS)[0-9]+} $Access Match OrgaLu } if {$OrgaLu==""} { regexp {_([^ ]+)$} $Access Match OrgaLu } if {$OrgaLu==""} { continue } set TFA [LaSequenceDuTFAs $FicNucAliTfa $Access] set Entete [EnteteDuTexteTFA $TFA] set SeqNucGap [QueLaSequenceDuTexteTFA $TFA] #if {[regexp rhiRox1 $Access]} { Espionne $SeqNucGap } regsub -all {\-} $SeqNucGap "" SeqNuc set MAMAsSeq($Nom,$OrgaLu,header,) $Entete set MAMAsSeq($Nom,$OrgaLu,nuc,gap) $SeqNucGap set MAMAsSeq($Nom,$OrgaLu,nuc,) $SeqNuc lappend MAMAsSeq($Nom,all,nuc,gap) $SeqNucGap lappend MAMAsSeq($Nom,all,nuc,) $SeqNuc lappend MAMAsSeq($Nom,listof,access,) $Access lappend MAMAsSeq($Nom,listof,org,) $OrgaLu } set FicProtAliTfa [GscopeFile $Nom "protalitfa"] if {[FileAbsent $FicProtAliTfa]} { return "" } foreach {Access} [LaSequenceDuTFAs $FicProtAliTfa "LaListeDesAccess"] { set OrgaLu "" if {$OrgaLu==""} { regexp {(Joy[A-Z][a-z]CDS)[0-9]+} $Access Match OrgaLu } if {$OrgaLu==""} { regexp {_([^ ]+)$} $Access Match OrgaLu } if {$OrgaLu==""} { continue } set TFA [LaSequenceDuTFAs $FicProtAliTfa $Access] set Entete [EnteteDuTexteTFA $TFA] set SeqProtGap [QueLaSequenceDuTexteTFA $TFA] regsub -all {\-} $SeqProtGap "" SeqProt set MAMAsSeq($Nom,$OrgaLu,prot,gap) $SeqProtGap set MAMAsSeq($Nom,$OrgaLu,prot,) $SeqProt lappend MAMAsSeq($Nom,all,prot,gap) $SeqProtGap lappend MAMAsSeq($Nom,all,prot,) $SeqProt } return [MAMAsSeq $Nom $Org $NucOrProt $Gap] } proc MAMAsOrg {{Qui ""}} { set LesOrgs [list hg38 panTro4 panPan1 gorGor3 ponAbe2 nomLeu3 rheMac3 macFas5 papAnu2 chlSab2 nasLar1 rhiRox1 calJac3 saiBol1 tarSyr2 micMur1 otoGar3 tupBel1 mm10 canFam3] if {$Qui=="GetList"} { return $LesOrgs } return [ChoixParmi $LesOrgs] } proc ColumnsOfCodonsInXMotifsPourTous {{Frame ""} {NbCodons ""} {Cardinality ""} {RefOrg ""}} { foreach Nom [ListeDesPABs] { set FichierMsf [GscopeFile $Nom msf] if {[FileAbsent $FichierMsf]} { continue } set NbCops 0 foreach Ligne [LesLignesDuFichier $FichierMsf] { if {[regexp "^//" $Ligne]} { break } if {[regexp "Name:" $Ligne]} { incr NbCops } } set Product [ExtraitInfo $Nom "product:"] set LocusTag [ExtraitInfo $Nom "locus_tag"] set Ligne [format "%2d %-10s %-8s" $NbCops $LocusTag [MDScore $Nom]] append Ligne [ColumnsOfCodonsInXMotifs $Nom $Frame $NbCodons $Cardinality $RefOrg] if {$Ligne==""} { continue } if { ! [regexp {: 0} $Ligne ]} { append Ligne " $Product" } Espionne $Ligne AppendAuFichier [Fiches "tous.txt"] $Ligne } return Bravo } proc ColumnsOfCodonsInXMotifs {Nom {Frame ""} {NbCodons ""} {Cardinality ""} {RefOrg ""}} { #rR Attention 31 en dur !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if {$NbCodons==""} { set NbCodons 3 } if {$Cardinality==""} { set Cardinality 1 } #rR Attention 31 en dur !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if {$RefOrg==""} { set RefOrg [PreFixe] } if {$Frame==""} { set Frame 0 } set FicMsf [GscopeFile $Nom "msf"] set LesPositions {} set FeaturesDir [X${Frame}MotifsFeaturesDir]$NbCodons$Cardinality set FeaturesFile "$FeaturesDir/$Nom" if {[FileAbsent $FeaturesFile]} { return "" } set IemeMotif 0 set ListOfMotif {} foreach Ligne [lrange [LesLignesDuFichier $FeaturesFile] 1 end] { lassign [split $Ligne ";"] Access LOCAL DL FL Couleur L C set DG [PositionDansMSF "Global" $FicMsf $DL $Access] set FG [PositionDansMSF "Global" $FicMsf $FL $Access] lappend LesPositions "$Access $DL $FL $DG $FG" if {[regexp "^$RefOrg" $Access]} { incr IemeMotif lappend ListOfMotif $IemeMotif set LocalStart($IemeMotif) $DL set LocalStop($IemeMotif) $FL set GlobalStart($IemeMotif) $DG set GlobalStop($IemeMotif) $FG } foreach G [NombresEntre $DG $FG] { lappend LesJaunesDe($G) $Access } } foreach IemeMotif $ListOfMotif { set M 0 foreach G [NombresEntre $GlobalStart($IemeMotif) $GlobalStop($IemeMotif)] { set M [Maxi $M [llength $LesJaunesDe($G)]] } lappend LesM $M incr Histo($M) } foreach M [NombresEntre 9 1 -1] { if { ! [info exists Histo($M)]} { set N " 0" } else { set N [format "%2d" $Histo($M)] } lappend LeMessage $N } set Message [join $LeMessage " "] return "$Nom : $Message" } proc InformeColumnsOfCodonsInXMotifs {} { foreach Ligne [LesLignesDuFichier [Fiches "ColumnsOfCodonsInXMotifs.txt"]] { Espionne $Ligne lassign [LesMotsDeLaLigne $Ligne] NbCops Product MDScore Nom pp N9 N8 N7 N6 N5 N4 N3 N2 N1 InformeSansDemander $Nom "=NbCops: $NbCops" InformeSansDemander $Nom "=NbMotifs: $N9 $N8 $N7 $N6 $N5 $N4 $N3 $N2 $N1" if {$N9!=0} { InformeSansDemander $Nom "=WithXColumn: yes" } } return } proc InformeNbXMotifsPourTous {} { set LaSortie {} foreach Nom [ListeDesPABs] { set Les31 [XMotifs $Nom 0 3 1] set Les41 [XMotifs $Nom 0 4 1] set Les44 [XMotifs $Nom 0 4 4] set N31 [expr [llength $Les31]/2] set N41 [expr [llength $Les41]/2] set N44 [expr [llength $Les44]/2] set Ligne [format "%s %2d %2d %2d" $Nom $N31 $N41 $N44] Espionne $Ligne continue InformeSansDemander $Nom "=NbXMotifs31: $N31" InformeSansDemander $Nom "=NbXMotifs41: $N41" InformeSansDemander $Nom "=NbXMotifs44: $N44" lappend LaSortie $Ligne } return $LaSortie } proc InformeEHomsaReferencePourTous {} { file mkdir "[RepertoireDuGenome]/infos" set RepInfoEHomsa "[GscopeDatabaseDir EHomsa]/infos" set LesInformes {} foreach Nom [ListeDesPABs] { set FicBlast [GscopeFile $Nom blastp] if {[FileAbsent $FicBlast]} { continue } set n [DecortiqueBlast $FicBlast "" "" Query lBanqueId] if {$n==0} { continue } set Ref [lindex $lBanqueId 0] set LesInfosEHomsa [LesLignesDuFichier $RepInfoEHomsa/$Ref] set LesInfos [lrange $LesInfosEHomsa 1 end] set Infos [join $LesInfos "\n"] InformeSansDemander $Nom $Infos lappend LesInformes $Nom } return $LesInformes } proc MsfOnOneLine {Nom InDir OutDir {AliTfaDir ""} {ShowCodons ""} {DoNotUnSet ""}} { #rR on convertit un msf en s'aidant de l'entete d'un msf normal et éventuellemtn de l'alignement pris dans nucalitfa ou protalitfa #rR et on crée un msf sur une ligne en gardant les majuscules miniscules de nucalitfa ou protalitfa set ShowCodons [string equal -nocase $ShowCodons "ShowCodons"] set Mixe 0 if {[regexp "NucProt" $OutDir]} { set Mixe 1 } set FicAliTfa "" if {$AliTfaDir!=""} { if {[regexp "/" $AliTfaDir]} { set FicAliTfa $AliTfaDir } else { set FicAliTfa [GscopeFile $Nom $AliTfaDir] } } if {[FileAbsent $FicAliTfa]} { set FicAliTfa "" } set LeMsf {} set OnAttendSlashSlash 1 set OnAttendLAlignement 0 set OnEstDansLAlignement 0 set LesAccessSpace {} foreach Ligne [LesLignesDuFichier "$InDir/$Nom"] { if {$OnAttendSlashSlash} { lappend LeMsf $Ligne if {$Ligne!="//"} { continue } set OnAttendSlashSlash 0 set OnAttendLAlignement 1 continue } if {$OnAttendLAlignement} { if {$Ligne==""} { lappend LeMsf $Ligne continue } set OnAttendLAlignement 0 set OnEstDansLAlignement 1 } if {$Ligne==""} { continue } if { ! [regexp -nocase {^([^ ]+ +)([^ ].*$)} $Ligne Match AccessSpace Seq]} { FaireLire "Wrong $Ligne" } lappend LesAccessSpace $AccessSpace append SeqDe($AccessSpace) $Seq } foreach AccessSpace [ListeSansDoublon $LesAccessSpace] { set Access [string trim $AccessSpace] set Seq $SeqDe($AccessSpace) if {$FicAliTfa!=""} { set Seq [QueLaSequenceDuTexteTFA [LaSequenceDuTFAs $FicAliTfa $Access]] } regsub -all " " $Seq "" Seq if {$ShowCodons} { set LesCodons {} foreach {a b c} [split $Seq ""] { set Codon "$a$b$c" set P [AAduCodon $Codon] if {$P=="X"} { set P "." } if {$Mixe} { lappend LesCodons "[string tolower $Codon]$P" } else { lappend LesCodons "$a$b$c" } } set Seq [join $LesCodons " "] } lappend LeMsf "$AccessSpace$Seq" } if {$FicAliTfa!="" && $DoNotUnSet!="DoNotUnSet"} { LaSequenceDuTFAs $FicAliTfa "UnSet" } lappend LeMsf "" set F [SauveLesLignes $LeMsf dans "$OutDir/$Nom"] return $F } proc CreateNucAndProtTfaAndMsfFromNucAliTfaPourTous {Rep Prefixe {IndexReference ""}} { #rR Doit être exécuter par setgscoperr Zero car le projet Rep n'existe pas encore set RepNAT "$Rep/nucalitfa" Espionne $RepNAT file mkdir "$Rep/nuctfa" file mkdir "$Rep/prottfa" file mkdir "$Rep/protalitfa" file mkdir "$Rep/msfNuc" file mkdir "$Rep/msfNuc3" file mkdir "$Rep/msfProt" file mkdir "$Rep/msfProt3" set N 0 foreach Fichier [lsort [glob $RepNAT/$Prefixe*]] { set Nom [file tail $Fichier] if {[file exists "$RepNAT/msfProt3/$Nom"]} { continue } if {[PasTouche $Nom]} { continue } Espionne $Nom CreateNucAndProtTfaAndMsfFromNucAliTfa $Nom $Rep $IndexReference FinPasTouche $Nom incr N } Espionne "CreateNucAndProtTfaAndMsfFromNucAliTfaPourTous en a fait $N" return $N } proc CreateProtAliTfaFromNucAliTfaForJoyPourTous {} { set LesR {} foreach Nom [ListeDesPABs] { set R [CreateProtAliTfaFromNucAliTfaForJoy $Nom] #Espionne $Nom $R } return Bravo } proc CreateProtAliTfaFromNucAliTfaForJoy Nom { set FicNucAliTfa [GscopeFile $Nom "nucalitfa"] set FicProtAliTfa [GscopeFile $Nom "protalitfa"] if {[FileAbsent $FicNucAliTfa]} { return "" } set LeTFAsPro {} foreach Access [LaSequenceDuTFAs $FicNucAliTfa "LaListeDesAccess"] { set TFA [LaSequenceDuTFAs $FicNucAliTfa $Access] set Entete [EnteteDuTexteTFA $TFA] set Seq [QueLaSequenceDuTexteTFA $TFA] set SeqPro "" foreach {a b c} [split $Seq ""] { set Codon "$a$b$c" set A [AAduCodon $Codon] if {$A=="X"} { set A "-" } if {$A=="*"} { set A "X" } if {[regexp {[a-z]} $A]} { set A [string tolower $A] } append SeqPro $A } lappend LeTFAsPro $Entete $SeqPro } return [SauveLesLignes $LeTFAsPro dans $FicProtAliTfa] } proc CreateNucAliFromProtAliPourTous {} { set LesR {} foreach Nom [ListeDesPABs] { set R [CreateNucAliFromProtAli $Nom] #Espionne $Nom $R } return Bravo } proc CreateNucAliFromProtAli Nom { set FichierMsfProt [GscopeFile $Nom msfProt] if {[FileAbsent $FichierMsfProt]} { return "" } set MsfProt [ContenuDuFichier $FichierMsfProt] if {[regexp "^NoSequencesToAlign" $MsfProt]} { return "" } set FichierMsfTfa [TmpFile].tfa exec clustalw $FichierMsfProt -convert -output=FASTA -outfile=$FichierMsfTfa set LeMsfTfaNuc {} foreach Access [LaSequenceDuTFAs $FichierMsfTfa "LaListeDesAccess"] { regsub {CDS[0-9]+$} $Access CDS Science set SeqProtTfa [QueLaSequenceDuFichierTFA "[JoyDir]/$Science/prottfa/$Access"] set SeqNucTfa [QueLaSequenceDuFichierTFA "[JoyDir]/$Science/nuctfa/$Access"] Espionne $Nom $Access [expr [string length $SeqProtTfa]*3] [string length $SeqNucTfa] set I -1 foreach P [split $SeqProtTfa ""] {a b c} [split $SeqNucTfa ""] { incr I set Codon "$a$b$c" set CodonEn($I) $Codon #Espionne $Access $I $P [AAduCodon $Codon] $Codon } set TfaProt [LaSequenceDuTFAs $FichierMsfTfa $Access] set SeqProt [QueLaSequenceDuTexteTFA $TfaProt] set LaSeqNucAlignee {} set I -1 foreach A [split $SeqProt ""] { if {$A=="-" || $A=="."} { lappend LaSeqNucAlignee "---" ; continue } incr I #Espionne $I $A set Codon [string toupper $CodonEn($I)] lappend LaSeqNucAlignee $Codon } lappend LeMsfTfaNuc ">$Access" lappend LeMsfTfaNuc [join $LaSeqNucAlignee ""] } set FichierNucAliTfa [SauveLesLignes $LeMsfTfaNuc dans [GscopeFile $Nom "nucalitfa"]] set FichierMsfNuc [GscopeFile $Nom "msfNuc"] exec clustalw $FichierNucAliTfa -convert -output=gscope -outfile=$FichierMsfNuc MsfOnOneLine $Nom msfNuc msfNuc3 "" ShowCodons MsfOnOneLine $Nom msfNuc msfNucProt3 "" ShowCodons return $FichierMsfNuc } proc ReRunConvertWithClustalwPourTous {} { foreach Nom [ListeDesPABs] { Espionne $Nom ReRunConvertWithClustalw $Nom } return "Bravo" } proc ReRunConvertWithClustalw {Nom} { #rR Voir plus bas la bonne procedure .. #rR Celle -ni ne fait que relander les création de msf et macsims set Rep [RepertoireDuGenome] set FicNucTfa "$Rep/nuctfa/$Nom" set FicNucAliTfa "$Rep/nucalitfa/$Nom" set FicProtAliTfa "$Rep/protalitfa/$Nom" set FicProtTfa "$Rep/prottfa/$Nom" set FicMsfNuc "$Rep/msfNuc/$Nom" set FicMsfNuc3 "$Rep/msfNuc3/$Nom" set FicMsfProt "$Rep/msfProt/$Nom" set FicMsfProt3 "$Rep/msfProt3/$Nom" if {[FileAbsent $FicNucAliTfa]} { return "" } exec clustalw $FicNucAliTfa -convert -output=gscope -outfile=$FicMsfNuc set FN [MsfOnOneLine $Nom "$Rep/msfNuc" "$Rep/msfNuc3" "nucalitfa" "ShowCodons"] exec clustalw $FicProtAliTfa -convert -output=gscope -outfile=$FicMsfProt set FP [MsfOnOneLine $Nom "$Rep/msfProt" "$Rep/msfProt3" "protalitfa" "" "DoNotUnset"] return [list $FicMsfNuc $FN $FicMsfProt $FP] } proc CreateNucAndProtTfaAndMsfFromNucAliTfa {Nom Rep {IndexReference ""}} { #rR Doit être exécuter par setgscoperr Zero car le projet Rep n'existe pas encore #rR on a les alignements nucleotidiques en FASTA aligné. On crée des msf avec une ligne par access #rR On garde les majuscules-miniscules set FicNucTfa "$Rep/nuctfa/$Nom" set FicNucAliTfa "$Rep/nucalitfa/$Nom" set FicProtAliTfa "$Rep/protalitfa/$Nom" set FicProtTfa "$Rep/prottfa/$Nom" set FicMsfNuc "$Rep/msfNuc/$Nom" set FicMsfNuc3 "$Rep/msfNuc3/$Nom" set FicMsfProt "$Rep/msfProt/$Nom" set FicMsfProt3 "$Rep/msfProt3/$Nom" if {$IndexReference==""} { set IndexReference 0 } set LesAccess [LaSequenceDuTFAs $FicNucAliTfa "LaListeDesAccess"] set AccessReference [lindex $LesAccess $IndexReference] set TFA [LaSequenceDuTFAs $FicNucAliTfa $AccessReference] set EnteteHs [EnteteDuTexteTFA $TFA] set SeqNucHs [QueLaSequenceDuTexteTFA $TFA] regsub -all -nocase {[^a-z]} $SeqNucHs "" SeqNucHsNonGap Sauve [SequenceFormatTFA $SeqNucHsNonGap $EnteteHs "nucbrut"] dans $FicNucTfa set SeqProHs "" foreach {a b c} [split $SeqNucHsNonGap ""] { set Codon "$a$b$c" set A [AAduCodon $Codon] if {$A=="*"} { set A "X" } append SeqProHs $A } Sauve [SequenceFormatTFA $SeqProHs $EnteteHs "protbrut"] dans $FicProtTfa set LeTFAsPro {} foreach Access $LesAccess { set TFA [LaSequenceDuTFAs $FicNucAliTfa $Access] set Entete [EnteteDuTexteTFA $TFA] set Seq [QueLaSequenceDuTexteTFA $TFA] set SeqPro "" foreach {a b c} [split $Seq ""] { set Codon "$a$b$c" set A [AAduCodon $Codon] if {$A=="X"} { set A "-" } if {$A=="*"} { set A "X" } if {[regexp {[a-z]} $A]} { set A [string tolower $A] } append SeqPro $A } lappend LeTFAsPro $Entete $SeqPro } SauveLesLignes $LeTFAsPro dans $FicProtAliTfa exec clustalw $FicNucAliTfa -convert -output=gscope -outfile=$FicMsfNuc set FN [MsfOnOneLine $Nom "$Rep/msfNuc" "$Rep/msfNuc3" "nucalitfa" "ShowCodons"] exec clustalw $FicProtAliTfa -convert -output=gscope -outfile=$FicMsfProt set FP [MsfOnOneLine $Nom "$Rep/msfProt" "$Rep/msfProt3" "protalitfa" "" "DoNotUnset"] LaSequenceDuTFAs $FicNucAliTfa UnSet return [list $FicNucTfa $FicProtTfa $FicMsfNuc $FN $FicMsfProt $FP] } proc ExtractFromExonNuc {Access {Fichier ""}} { #rR used to get only a subset of knowCanonical set TestAcc ">${Access}_" set FinAccess [expr [string length $TestAcc] - 1] if {$Fichier==""} { set Fichier "/home/ripp/circo/$Access.tfa" } set LeMorceau {} set DejaVu 0 set OnAttend 1 set OnAttendLaFin 0 foreach Ligne [LesLignesDuFichier /home/ripp/circo/knownCanonical.exonNuc.fa] { if {$Ligne!=""} { if {$OnAttendLaFin && [string index $Ligne 0 ]==">" && [string range $Ligne 0 $FinAccess] ne $TestAcc} { break } if {$OnAttend && [string range $Ligne 0 $FinAccess] ne $TestAcc} { continue } if {$OnAttend && [string range $Ligne 0 $FinAccess] eq $TestAcc} { set OnAttend 0 set OnAttendLaFin 1 set LeMorceau [list $Ligne] continue } } if {$OnAttend} { continue } lappend LeMorceau $Ligne } return [SauveLesLignes $LeMorceau dans $Fichier] } proc ShowXMotifs {Nom {Frame ""} {N ""} {C ""}} { set LesCouples {} foreach {K V} [XMotifs $Nom $Frame $N $C] { Espionne "$K $V" foreach Couple $V { Espionne $Couple lappend LesCouples $Couple set LC([lindex $Couple 0]) $K } } set LesCouples [lsort -integer -index 0 $LesCouples] set Seq [string tolower [QueLaSequenceDuFichierTFA [GscopeFile $Nom nuctfa]]] set LesDF {} foreach Couple $LesCouples { lassign $Couple D F incr F 2 lappend LesDF $D $F } Espionne $LesDF set LesAvant [lrange [concat [list -1] $LesDF] 0 end-1] set LesApres $LesDF Espionne $LesAvant Espionne $LesApres set Rule [string repeat ". " [expr ([string length $Seq]+2)/3]] set SeQ "" set Reg "" foreach {AvD AvF} $LesAvant {ApD ApF} $LesApres { append SeQ [string range $Seq $AvD+1 $AvF-1] append SeQ [string toupper [string range $Seq $ApD $ApF]] append Reg [string repeat " " [expr $AvF-$AvD-1]] set Info "$LC($ApD)" append Reg $Info append Reg [string repeat "." [expr 1 + $ApF - $ApD - [string length $Info]]] } if {[info exists ApF]} { append SeQ [string range $Seq $ApF+1 end] } Affiche "$Rule\n$SeQ\n$Reg" "GrandeLargeur" } proc Karim {NomOuSeq {Frame ""} {NbCodons ""} {Cardinality ""}} { if {$NbCodons==""} { set NbCodons 4 } if {$Cardinality==""} { set Cardinality 4 } if {$Frame==""} { set Frame 0 } if {[EstUnPAB $NomOuSeq]} { set Seq [QueLaSequenceDuFichierTFA [GscopeFile $NomOuSeq nuctfa]] } elseif {[file exists $NomOuSeq]} { set Seq [QueLaSequenceDuFichierTFA $NomOuSeq] } else { set Seq $NomOuSeq } ################## no no no set Seq [string range $Seq $Frame end] set Seq [string toupper $Seq] append Seq "AAAAAA" set DeleteMe 1 set Fasta "[TmpFile].tfa" Sauve [SequenceFormatTFA $Seq "ForKarim" "nucbrut"] dans $Fasta set R [exec java -jar /home/julie/NetBeansProjects/CircularCode/dist/CircularCode.jar -seq $Fasta -window $NbCodons -unique $Cardinality] #set R [exec java -jar /home/ripp/karim/cc.jar -seq $Fasta -window $NbCodons -unique $Cardinality] if {$DeleteMe} { file delete $Fasta } set LesHits {} foreach Ligne [split $R "\n"] { if { ! [regexp {^([^\,]+),([0-9]+),([0-9]+),([0-9]+),([0-9]+),([0-9]+)$} $Ligne Match S D F L Fr C]} { continue } if {$Fr!=$Frame} { continue } set LesCodons [split $S " "] incr D -1 incr F -3 incr D -$Frame incr F -$Frame lappend LesHits [list $D $F $LesCodons] } foreach Hit $LesHits { lassign $Hit D F LesCodons set Long [expr {($F-$D)/3+1}] if {$Long<$NbCodons} { continue } set Card [llength [lsort -unique $LesCodons]] if {$Card<$Cardinality} { continue } lappend Result($Long,$Card) [list [expr $D + $Frame] [expr $F + $Frame]] } #parray Result #Espionne [array get Result] return [array get Result] } proc XMotifs {NomOuSeq {Frame ""} {NbCodons ""} {Cardinality ""}} { #rR returns the array Result with Result(length,card) containing the list os (startFirstCodon,StartLastCodon) if {$NbCodons==""} { set NbCodons 4 } if {$Cardinality==""} { set Cardinality 4 } if {$Frame==""} { set Frame 0 } set FicNuc [GscopeFile $NomOuSeq nuctfa] if {[file exists $FicNuc]} { set Seq [QueLaSequenceDuFichierTFA $FicNuc] } else { set Seq $NomOuSeq } set Seq [string range $Seq $Frame end] set Seq [string toupper $Seq] append Seq "NNNNNN" set I -3 set D "" set F "" set LesHits {} set LesCodons {} foreach {a b c} [split $Seq {}] { incr I 3 set Codon "$a$b$c" if {[IsX0 $Codon]} { if {$D==""} { set D $I } set F $I lappend LesCodons $Codon } else { if {$D==""} { continue } lappend LesHits [list $D $F $LesCodons] set D "" set LesCodons {} } } foreach Hit $LesHits { lassign $Hit D F LesCodons set Long [expr {($F-$D)/3+1}] if {$Long<$NbCodons} { continue } set Card [llength [lsort -unique $LesCodons]] if {$Card<$Cardinality} { continue } lappend Result($Long,$Card) [list [expr $D + $Frame] [expr $F + $Frame]] } #parray Result #Espionne [array get Result] return [array get Result] } proc IsX0 {Codon} { #rR you can get the list of codon from CirCode or X0 or IsX0 (CirCode is the best manner) global IsX0 if {$Codon=="Reset"} { if {[info exists IsX0]} { unset IsX0 } return } set LesAA {} set Codon [string toupper $Codon] if {[info exists IsX0($Codon)]} { return $IsX0($Codon) } if {[info exists IsX0("EstCharge")]} { return 0 } set IsX0("EstCharge") 1 set LesX0 [CirCode] foreach C $LesX0 { lappend LesAA [AAduCodon $C] } foreach X $LesX0 { set IsX0($X) 1 } set IsX0(LISTOFCODONS) $LesX0 set IsX0(LISTOFAAS) $LesAA return [IsX0 $Codon] } proc X0 {{GetWhat ""}} { #rR you can get the list of codon from CirCode or X0 or IsX0 (CirCode is the best manner) if {$GetWhat==""} { set GetWhat "GetCodons" } if {[regexp -nocase "codon" $GetWhat]} { return [CirCode] } foreach Codon [CirCode] { lappend LesAA [AAduCodon $Codon] } if {[regexp -nocase "NiceA" $GetWhat]} { return [join $LesAA " "] } return $LesAA } proc TestRandomCodonMatrix {} { Espionne 01 ListOf Codon 0 [RandomCodonMatrix 01 ListOf Codon 0] Espionne 02 ListOf Codon 0 [RandomCodonMatrix 02 ListOf Codon 0] Espionne RandomCodonMatrix 01 AAC AAT 0 [RandomCodonMatrix 01 AAC AAT 0] RandomCodonMatrix 01 AAC AAT 0 Espionne RandomCodonMatrix 02 ACT AGG 0 [RandomCodonMatrix 02 ACT AGG 0] RandomCodonMatrix 02 ACT AGG 0 Espionne 01 ListOf Codon 0 [RandomCodonMatrix 01 ListOf Codon 0] } proc RandomCodonMatrix {R {Qui ""} {Quoi ""} {Frame ""}} { global CodonMatrix global RandomCodonMatrix if {[info exists RandomCodonMatrix] && $RandomCodonMatrix!=$R} { if {[info exist CodonMatrix]} { unset CodonMatrix } } set RandomCodonMatrix $R CirCode DefaultCirCode $R Espionne [CirCode DefaultCirCode] [info exist CodonMatrix] return [CodonMatrix $Qui $Quoi $Frame] } proc PrintCodonMatrix {Frame {Norm ""} {Random ""}} { #rR ATTENTION since 2018/03/07 CodonMatrix is no more a symmetric matrix global CodonMatrix ; #rR I added this global to unset it if we change Random global IsX0 global PreviousRandom if {$Random!="" && [info exists PreviousRandom] && $PreviousRandom!=$Random} { if {[info exists CodonMatrix]} { unset CodonMatrix } if {[info exists IsX0]} { unset IsX0 } } set PreviousRandom $Random set Tabulated 0 if {[regexp -nocase tab $Norm]} { set Tabulated 1 regsub -nocase tab $Norm "" Norm } if {$Random!=""} { CirCode DefaultCirCode $Random } set OnlyCirCodeCodons 0 if {$OnlyCirCodeCodons} { set CodonsFromCirCode [CodonMatrix ListOf Codon $Frame] } else { set CodonsFromCirCode {} foreach Codon [CodonMatrix ListOf Codon $Frame] { if {$Codon ni [CirCode]} { continue } lappend CodonsFromCirCode $Codon } } foreach CI [CodonMatrix ListOfAll Codon $Frame] { foreach CJ [CodonMatrix ListOf Codon $Frame] { set V($CI,$CJ) [CodonMatrix $CJ $CI $Norm$Frame] ; #rR Attention CI CJ are exchanged #Espionne $CI $CJ =$V($CI,$CJ)= if {$V($CI,$CJ)=="650"} { Espionne $CI $CJ } if {$V($CI,$CJ)==""} { set V($CI,$CJ) 0 } } } set LesAAs {} foreach Codon $CodonsFromCirCode { lappend LesAAs [AAduCodon $Codon] } lappend LaSortie ". . ... [join $LesAAs { }]" lappend LaSortie ". . ... [join $CodonsFromCirCode { }]" ######################################################################### foreach CI [CodonMatrix ListOf Codon $Frame] foreach CI [CodonMatrix ListOfAll Codon] { set TheResults {} foreach CJ $CodonsFromCirCode { if {$OnlyCirCodeCodons && $CJ ni [CirCode]} { continue } set Value 0 if {[info exists V($CI,$CJ)]} { set Value $V($CI,$CJ) } if {$Norm==""} { lappend TheResults [format "%7d" $Value] } else { set Value [expr round($Value)] lappend TheResults [format "%7d" $Value] } } set IsX "." if {[IsX0 $CI]} { set IsX "X" } lappend LaSortie "[AAduCodon $CI] $IsX $CI [join $TheResults { }]" } set Sortie [join $LaSortie "\n"] if {$Tabulated} { regsub -all { +} $Sortie "\t" Sortie } return $Sortie } proc PrintBothCodonMatrix {Frame {Norm ""} {Random ""}} { set Tabulated 0 if {[regexp -nocase tab $Norm]} { set Tabulated 1 regsub -nocase tab $Norm "" Norm } if {$Random!=""} { CirCode DefaultCirCode $Random } set OnlyCirCodeCodons 0 if {$OnlyCirCodeCodons} { set CodonsFromCirCode [CodonMatrix ListOf Codon $Frame] } else { set CodonsFromCirCode {} foreach Codon [CodonMatrix ListOf Codon $Frame] { if {$Codon ni [CirCode]} { continue } lappend CodonsFromCirCode $Codon } } foreach CI [CodonMatrix ListOf Codon $Frame] { foreach CJ [CodonMatrix ListOf Codon $Frame] { set V($CI,$CJ) [CodonMatrix $CI $CJ $Norm$Frame] if {$V($CI,$CJ)==""} { set V($CI,$CJ) 0 } } } set LesAAs {} foreach Codon $CodonsFromCirCode { lappend AAs [AAduCodon $Codon] } lappend LaSortie ". . ... [join $AAs { }]" lappend LaSortie ". . ... [join $CodonsFromCirCode { }]" foreach CI [CodonMatrix ListOf Codon $Frame] { set TheResults {} foreach CJ $CodonsFromCirCode { if {$OnlyCirCodeCodons && $CJ ni [CirCode]} { continue } if {$Norm==""} { set Value $V($CI,$CJ) lappend TheResults [format "%7d" $Value] } else { set Value [expr round(100000*$V($CI,$CJ))] lappend TheResults [format "%7d" $Value] } } set IsX0 "." if {[IsX0 $CI]} { set IsX0 "X" } lappend LaSortie "[AAduCodon $CI] $IsX0 $CI [join $TheResults { }]" } set Sortie [join $LaSortie "\n"] if {$Tabulated} { regsub -all { +} $Sortie "\t" Sortie } return $Sortie } proc CodonMatrixNotModified {{Qui ""} {Quoi ""} {Frame ""}} { global CodonMatrix set UseDiagonal 1 set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] if {[info exists CodonMatrix($Frame,$Qui,$Quoi)]} { return $CodonMatrix($Frame,$Qui,$Quoi) } if {[info exists CodonMatrix("EstCharge")]} { if {[info exists CodonMatrix($Frame,$Quoi,$Qui)]} { return $CodonMatrix($Frame,$Quoi,$Qui) } return 0 } set CodonMatrix("EstCharge") 1 set OnlyCirCodeCodons 0 #array set CodonMatrix [ContenuDuFichier [CodonMatrixFile]] array set CodonMatrix [ContenuDuFichier [CodonMatrixFileHuman]] foreach K [array names CodonMatrix] { lassign [split $K ","] F C1 C2 #if {[regexp -nocase {(count|charge|sum|codon|norm|text|list|frame)} $K]} { continue } if { ! [regexp -nocase {^[ATGC][ATGC][ATGC]$} $C1]} { continue } lappend CodonMatrix(LISTOF,FRAME,) $F lappend CodonMatrix($F,LISTOF,CODON) $C1 $C2 } set CodonMatrix(LISTOF,FRAME,) [lsort -unique $CodonMatrix(LISTOF,FRAME,)] foreach F $CodonMatrix(LISTOF,FRAME,) { set CodonMatrix($F,LISTOF,CODON) [lsort -unique $CodonMatrix($F,LISTOF,CODON)] set Lines {} set LinesNorm {} set LinesNormCirCode {} ;#rR attention this line was not there before 2018/02/14 set LinesNormColumn {} ;#rR attention this line was not there before 2018/02/14 set Sum 0 set SumCirCode 1 set HowMany 0 foreach CJ $CodonMatrix($F,LISTOF,CODON) { set SumForColumn($F,$CJ) 0 if { ! [IsX0 $CJ] } { continue } foreach CI $CodonMatrix($F,LISTOF,CODON) { if {($UseDiagonal || $CJ!=$CI) && [info exists CodonMatrix($F,$CI,$CJ)]} { set V $CodonMatrix($F,$CI,$CJ) incr SumForColumn($F,$CJ) $V incr HowMany incr Sum $V if { $CJ in [CirCode]} { incr SumCirCode $V } } } } foreach CI $CodonMatrix($F,LISTOF,CODON) { set Line {} foreach CJ $CodonMatrix($F,LISTOF,CODON) { if { ! [info exists TotalNormColumn($F,$CJ)]} { set TotalNormColumn($F,$CJ) 0 } set V 0 if {[info exists CodonMatrix($F,$CI,$CJ)]} { set V $CodonMatrix($F,$CI,$CJ) } if {[info exists CodonMatrix($F,$CJ,$CI)]} { set V $CodonMatrix($F,$CJ,$CI) } lappend Line $V set Norm [expr (1.0*$V)/$Sum] lappend LineNorm $Norm set CodonMatrix(Norm$F,$CI,$CJ) $Norm set NormCirCode [expr (1.0*$V)/$SumCirCode] lappend LineNormCirCode $NormCirCode set CodonMatrix(NormCirCode$F,$CI,$CJ) $NormCirCode if {$SumForColumn($F,$CJ)==0} { set NormColumn 0 } else { set NormColumn [expr (0.01*$V)/$SumForColumn($F,$CJ)] } set TotalNormColumn($F,$CJ) [expr $TotalNormColumn($F,$CJ)+$NormColumn] lappend LineNormColumn $NormColumn set CodonMatrix(NormColumn$F,$CI,$CJ) $NormColumn } lappend Lines [join $Line "\t"] lappend LinesNorm [join $LineNorm "\t"] lappend LinesNormCirCode [join $LineNormCirCode "\t"] lappend LinesNormColumn [join $LineNormColumn "\t"] } foreach CJ $CodonMatrix($F,LISTOF,CODON) { if { ! [IsX0 $CJ] } { continue } #Espionne $F $CJ $TotalNormColumn($F,$CJ) $SumForColumn($F,$CJ) } set CodonMatrix($F,GET,HOWMANY) $HowMany set CodonMatrix($F,GET,TEXT) [join $Lines "\n"] set CodonMatrix($F,GET,NORM) [join $LinesNorm "\n"] set CodonMatrix($F,GET,NORMCIRCODE) [join $LinesNormCirCode "\n"] set CodonMatrix($F,GET,NORMCOLUMN) [join $LinesNormColumn "\n"] set CodonMatrix($F,SUM,SUBSTITUTION) $Sum set CodonMatrix($F,SUMCIRCODE,SUBSTITUTION) $SumCirCode } return [CodonMatrix $Qui $Quoi $Frame] } proc CodonMatrix {{Qui ""} {Quoi ""} {Frame ""}} { #rR there is a problem with the defintion of CodonMatrix(LISTOF,CODON) do we use 20 or 57 or 64 ? global CodonMatrix set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] if {[info exists CodonMatrix($Frame,$Qui,$Quoi)]} { return $CodonMatrix($Frame,$Qui,$Quoi) } if {[info exists CodonMatrix("EstCharge")]} { return 0 } set CodonMatrix("EstCharge") 1 set OnlyCirCodeCodons 0 #array set CodonMatrix [ContenuDuFichier [CodonMatrixFile]] array set CodonMatrix [ContenuDuFichier [CodonMatrixFileHuman]] foreach K [array names CodonMatrix] { lassign [split $K ","] F C1 C2 #if {[regexp -nocase {(count|charge|sum|codon|norm|text|list|frame)} $K]} { continue } if { ! [regexp -nocase {^[ATGC][ATGC][ATGC]$} $C1]} { continue } lappend CodonMatrix(LISTOF,FRAME,) $F lappend CodonMatrix($F,LISTOFALL,CODON) $C1 $C2 lappend CodonMatrix(,LISTOFALL,CODON) $C1 $C2 } set CodonMatrix(,LISTOFALL,CODON) [lsort -unique $CodonMatrix(,LISTOFALL,CODON)] set CodonMatrix(LISTOF,FRAME,) [lsort -unique $CodonMatrix(LISTOF,FRAME,)] foreach F $CodonMatrix(LISTOF,FRAME,) { if { ! [info exists CodonMatrix($F,LISTOFALL,CODON)]} { set CodonMatrix($F,LISTOFALL,CODON) {} } set CodonMatrix($F,LISTOFALL,CODON) [lsort -unique $CodonMatrix($F,LISTOFALL,CODON)] set CodonMatrix($F,LISTOF,CODON) [CirCode] #Espionne CodonMatrix($F,LISTOF,CODON) [CirCode] set Lines {} set LinesNorm {} set LinesNormCirCode {} set LinesNormColumn {} set Sum 0 set SumCirCode 1 set HowMany 0 #rR Here ? foreach CJ $CodonMatrix($F,LISTOF,CODON) { set SumForColumn($F,$CJ) 0 #if { ! [IsX0 $CJ] } { continue } foreach CI $CodonMatrix($F,LISTOFALL,CODON) { if {[info exists CodonMatrix($F,$CJ,$CI)]} { set V $CodonMatrix($F,$CJ,$CI) #Espionne $V incr SumForColumn($F,$CJ) $V incr HowMany incr Sum $V #if { $CJ in [CirCode]} { incr SumCirCode $V } } } #Espionne $CJ $SumForColumn($F,$CJ) } #rR Here ? foreach CJ $CodonMatrix($F,LISTOF,CODON) { set Line {} foreach CI $CodonMatrix($F,LISTOFALL,CODON) { if { ! [info exists TotalNormColumn($F,$CJ)]} { set TotalNormColumn($F,$CJ) 0 } set V 0 if {[info exists CodonMatrix($F,$CJ,$CI)]} { set V $CodonMatrix($F,$CJ,$CI) } #if {[info exists CodonMatrix($F,$CJ,$CI)]} { set V $CodonMatrix($F,$CJ,$CI) } lappend Line $V set Norm [expr (1.0*$V)/$Sum] lappend LineNorm $Norm set CodonMatrix(Norm$F,$CI,$CJ) $Norm #set NormCirCode [expr (1.0*$V)/$SumCirCode] #lappend LineNormCirCode $NormCirCode #set CodonMatrix(NormCirCode$F,$CJ,$CI) $NormCirCode if {$SumForColumn($F,$CJ)==0} { set NormColumn 0 } else { set NormColumn [expr (1000.0*$V)/$SumForColumn($F,$CJ)] } set TotalNormColumn($F,$CJ) [expr $TotalNormColumn($F,$CJ)+$NormColumn] lappend LineNormColumn $NormColumn set CodonMatrix(NormColumn$F,$CJ,$CI) $NormColumn #Espionne $SumForColumn($F,$CJ) } lappend Lines [join $Line "\t"] lappend LinesNorm [join $LineNorm "\t"] #lappend LinesNormCirCode [join $LineNormCirCode "\t"] lappend LinesNormColumn [join $LineNormColumn "\t"] } #foreach CJ $CodonMatrix($F,LISTOF,CODON) { #if { ! [IsX0 $CJ] } { continue } #Espionne $F $CJ $TotalNormColumn($F,$CJ) $SumForColumn($F,$CJ) #} set CodonMatrix($F,GET,HOWMANY) $HowMany set CodonMatrix($F,GET,TEXT) [join $Lines "\n"] set CodonMatrix($F,GET,NORM) [join $LinesNorm "\n"] #set CodonMatrix($F,GET,NORMCIRCODE) [join $LinesNormCirCode "\n"] set CodonMatrix($F,GET,NORMCOLUMN) [join $LinesNormColumn "\n"] set CodonMatrix($F,SUM,SUBSTITUTION) $Sum set CodonMatrix($F,SUMCIRCODE,SUBSTITUTION) $SumCirCode #set CodonMatrix($F,LISTOF,CODON) [CirCode] ;#rR Outside we use only 20 codons } return [CodonMatrix $Qui $Quoi $Frame] } proc CodonMatrixRaymond {{Qui ""} {Quoi ""} {Frame ""}} { #rR there is a problem with the defintion of CodonMatrix(LISTOF,CODON) do we use 20 or 57 or 64 ? global CodonMatrix set UseDiagonal 1 set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] if {[info exists CodonMatrix($Frame,$Qui,$Quoi)]} { return $CodonMatrix($Frame,$Qui,$Quoi) } if {[info exists CodonMatrix("EstCharge")]} { return 0 } set CodonMatrix("EstCharge") 1 set OnlyCirCodeCodons 0 #array set CodonMatrix [ContenuDuFichier [CodonMatrixFile]] array set CodonMatrix [ContenuDuFichier [CodonMatrixFileHuman]] foreach K [array names CodonMatrix] { lassign [split $K ","] F C1 C2 #if {[regexp -nocase {(count|charge|sum|codon|norm|text|list|frame)} $K]} { continue } if { ! [regexp -nocase {^[ATGC][ATGC][ATGC]$} $C1]} { continue } lappend CodonMatrix(LISTOF,FRAME,) $F lappend CodonMatrix($F,LISTOFALL,CODON) $C1 $C2 lappend CodonMatrix(,LISTOFALL,CODON) $C1 $C2 } set CodonMatrix(,LISTOFALL,CODON) [lsort -unique $CodonMatrix(,LISTOFALL,CODON)] set CodonMatrix(LISTOF,FRAME,) [lsort -unique $CodonMatrix(LISTOF,FRAME,)] foreach F $CodonMatrix(LISTOF,FRAME,) { if { ! [info exists CodonMatrix($F,LISTOFALL,CODON)]} { set CodonMatrix($F,LISTOFALL,CODON) {} } set CodonMatrix($F,LISTOFALL,CODON) [lsort -unique $CodonMatrix($F,LISTOFALL,CODON)] set Lines {} set LinesNorm {} set LinesNormCirCode {} set LinesNormColumn {} set Sum 0 set SumCirCode 1 set HowMany 0 #rR Here ? foreach CJ $CodonMatrix($F,LISTOFALL,CODON) { set SumForColumn($F,$CJ) 0 if { ! [IsX0 $CJ] } { continue } foreach CI $CodonMatrix($F,LISTOFALL,CODON) { if {($UseDiagonal || $CJ!=$CI) && [info exists CodonMatrix($F,$CI,$CJ)]} { set V $CodonMatrix($F,$CI,$CJ) incr SumForColumn($F,$CJ) $V incr HowMany incr Sum $V if { $CJ in [CirCode]} { incr SumCirCode $V } } } } #rR Here ? foreach CI $CodonMatrix($F,LISTOFALL,CODON) { set Line {} foreach CJ $CodonMatrix($F,LISTOFALL,CODON) { if { ! [info exists TotalNormColumn($F,$CJ)]} { set TotalNormColumn($F,$CJ) 0 } set V 0 #if {[info exists CodonMatrix($F,$CI,$CJ)]} { set V $CodonMatrix($F,$CI,$CJ) } #if {[info exists CodonMatrix($F,$CJ,$CI)]} { set V $CodonMatrix($F,$CJ,$CI) } lappend Line $V set Norm [expr (1.0*$V)/$Sum] lappend LineNorm $Norm set CodonMatrix(Norm$F,$CI,$CJ) $Norm set NormCirCode [expr (1.0*$V)/$SumCirCode] lappend LineNormCirCode $NormCirCode set CodonMatrix(NormCirCode$F,$CI,$CJ) $NormCirCode if {$SumForColumn($F,$CJ)==0} { set NormColumn 0 } else { set NormColumn [expr (0.01*$V)/$SumForColumn($F,$CJ)] } set TotalNormColumn($F,$CJ) [expr $TotalNormColumn($F,$CJ)+$NormColumn] lappend LineNormColumn $NormColumn set CodonMatrix(NormColumn$F,$CI,$CJ) $NormColumn } lappend Lines [join $Line "\t"] lappend LinesNorm [join $LineNorm "\t"] lappend LinesNormCirCode [join $LineNormCirCode "\t"] lappend LinesNormColumn [join $LineNormColumn "\t"] } foreach CJ $CodonMatrix($F,LISTOFALL,CODON) { if { ! [IsX0 $CJ] } { continue } #Espionne $F $CJ $TotalNormColumn($F,$CJ) $SumForColumn($F,$CJ) } set CodonMatrix($F,GET,HOWMANY) $HowMany set CodonMatrix($F,GET,TEXT) [join $Lines "\n"] set CodonMatrix($F,GET,NORM) [join $LinesNorm "\n"] set CodonMatrix($F,GET,NORMCIRCODE) [join $LinesNormCirCode "\n"] set CodonMatrix($F,GET,NORMCOLUMN) [join $LinesNormColumn "\n"] set CodonMatrix($F,SUM,SUBSTITUTION) $Sum set CodonMatrix($F,SUMCIRCODE,SUBSTITUTION) $SumCirCode set CodonMatrix($F,LISTOF,CODON) [CirCode] ;#rR Outside we use only 20 codons } return [CodonMatrix $Qui $Quoi $Frame] } proc Sum {F} { set Whole 0 set Sum 0 set Upper 0 foreach CI [CodonMatrix LISTOF CODON $F] { foreach CJ [CodonMatrix LISTOF CODON $F] { if { ! [IsX0 $CJ] } { continue } set V [CodonMatrix $CI $CJ $F] #Espionne "$CI $CJ $V" incr Whole $V if {[info exists AlreadySeen($CJ,$CI)]} { incr Upper $V ; continue } set AlreadySeen($CI,$CJ) 1 incr Sum $V incr J } incr I } return "$Upper $Sum $Whole" } proc TestCM {} { array set CodonMatrix [ContenuDuFichier "/gl/MAMF/01_CodonMatrix.txt"] parray CodonMatrix set Sum 0 foreach K [array names CodonMatrix "0,*"] { if {[regexp -nocase count $K]} { continue } Espionne $K incr Sum $CodonMatrix($K) } Espionne $Sum return $Sum } proc TestCodonMatrix {} { set T 0 foreach V [LesMotsDuTexte [CodonMatrix Get Text 0]] { incr T $V } Espionne $T Espionne [CodonMatrix Sum Substitution 0] set T64 [CodonMatrix Sum Substitution 3] set T20 [CodonMatrix Sum Substitution 0] set T64_20 [expr ($T64*20)/64] set R [expr ($T20*100)/$T64_20] Espionne $R return } proc TestCirCode {} { Espionne [CirCode] Espionne [CirCode DefaultCirCode] Espionne [CirCode DefaultCirCode 01] Espionne [CirCode] Espionne [CirCode DefaultCirCode] Espionne [CirCode] Espionne [CirCode DefaultCirCode 00] Espionne [CirCode DefaultCirCode] Espionne [CirCode] } proc CirCodeFor {CC {Qui ""} {Quoi ""}} { #don't use it !!! set Memo [CirCode DefaultCirCode] CirCode DefaultCirCode $CC Espionne [CirCode DefaultCirCode] set Value [CirCode $Qui $Quoi] CirCode DefaultCirCode $Memo return $Value } proc CreeCirCode {} { set I 30 set LaListe {} NousAllonsAuBoulot "/gstock" foreach Ligne [LesLignesDuFichier /gl/MAMA/70] { incr I if {[FileAbsent MAMA$I]} { exec ln -s MAMA MAMA$I } if {[FileAbsent /gl/MAMA$I]} { exec ln -s /gstock/MAMA$I /gl/MAMA$I } set Ligne [string trim $Ligne] lappend LaListe " set R($I) \[list $Ligne\]" } OnRevientDuBoulot return $LaListe exit } proc CirCode {{Qui ""} {Quoi ""}} { #rR Be carefull : if you don't set DefaultCircode it is 00 except if you are in setgscope MAMAxy (01 ... 30) #rR ... and you have to be in the same runing process when you set DefaultCircode #rR #rR you can get the list of codon from CirCode or X0 or IsX0 (CirCode is the best manner) #rR CirCode xy returns the corresponding list of codons #rR CirCode DefaultCirCode returns the DefaultCirCode value (ie 00 or 01 ... or 30) #rR CirCode DefaultCirCode xy sets the DefaultCirCode to xy #rR CirCode ListOf Codon from the DefaultCirCode #rR CirCode ListOf AA from the DefaultCirCode (1 letter amino acid) #rR CirCode ListOf AA3 from the DefaultCirCode (3 letters amino acid) global DefaultCirCode set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] set R(00) [list AAC AAT ACC ATC ATT CAG CTC CTG GAA GAC GAG GAT GCC GGC GGT GTA GTC GTT TAC TTC] set R(01) [list AAC AAT ACA ACT AGG ATA ATT CAA CAG CTC CTG GCC GCG GCT GGC GTA GTC GTT TGC TGT] set R(02) [list ACT AGG AGT ATA ATG CAA CAG CCA CCG CTC GAA GAG GCT GGC TAC TAT TCC TCT TGG TTG] set R(03) [list AAT ACC AGA ATT CCT CGA CGG CTA CTC CTG GAA GAC GAT GCC GCG GGT GTG TAC TAT TTA] set R(04) [list ACC AGA AGG ATA ATC CCG CCT CGC CTA CTC CTG CTT GAA GAT GCA GGA GTA GTG TGT TTA] set R(05) [list AAC AAG ACA AGC CCA CGA CTG CTT GAG GCA GCC GGC GTA GTT TAC TAT TCA TCT TGG TGT] set R(06) [list AAC ACG AGA AGG ATA CCT CGC CGG CGT CTA CTG GAA GAC GCT GTA TAT TCC TGC TTA TTG] set R(07) [list AAT ACT AGT ATC CAA CCT CGA CTG GAC GAG GAT GCA GCG GCT GTA TAC TAT TCC TCG TGG] set R(08) [list AAG ACG AGG ATA ATG CAT CCA CGG CGT CTT GAC GCA GCC GCT GGC TAC TAT TCA TTA TTG] set R(09) [list AAG ACG ACT AGG AGT ATC CAA CAG CCT CGA CGC CTG CTT GCA GGT GTG GTT TAC TAT TCA] set R(10) [list ACC ACT AGA AGT ATG CAG CAT CCA CGC CTT GAA GAT GCA GCT GGA GGC GTA TCT TGT TTC] set R(11) [list AAC AAT ACA ACT AGG AGT ATA CAT CGC CTA CTC GAC GCA GCC GGC GTC GTT TGG TGT TTG] set R(12) [list AAG ACG ACT AGG ATA CAA CAC CAG CCT CGT CTA GAG GCA GCC GTG GTT TAT TCG TCT TTG] set R(13) [list AAC AAT AGA AGC ATT CAG CCT CGC CGG CTA GAC GAT GCC GGC GGT GTA TAT TCA TTC TTG] set R(14) [list ACA ACC ACG AGA AGG ATC ATT CAC CAT CCT CGT GAG GCA GGA GGC GTT TGC TGT TTA TTC] set R(15) [list AAC ACA ACC AGC AGG ATA CAT CGC CGG CTT GCA GGA GTA GTG TAC TCG TCT TGG TTA TTC] set R(16) [list AAC ACC ACG ACT AGA AGG ATA ATC ATG CAA CCG CCT CGT GCG GTG TCG TGG TTA TTC TTG] set R(17) [list AAC AAG ACA ACG ACT AGC AGG CAT CGA CTA CTC GAG GGA GTT TCC TCT TGC TGG TTC TTG] set R(18) [list AAT ACT AGC AGG ATA ATC ATG CAA CGC CTA CTC GAG GCC GCT GTA GTC GTT TAC TCG TGG] set R(19) [list AAC AGA AGG AGT ATA CAG CCA CGA CGG CTA CTG CTT GAC GGA GTC GTT TAT TCC TCG TTC] set R(20) [list AAT AGA AGG CAA CAG CCA CGG CTC CTG GAC GCA GCT GTA GTC GTG TAC TAT TCA TTC TTG] set R(21) [list AAG ACA ACC ACT ATA ATG ATT CAA CAT CGA CGG CTG CTT GAG GCG GCT GGC GTG TCT TTC] set R(22) [list ACA AGG AGT ATG ATT CAT CCG CGA CTC CTT GAA GAC GAG GCA GCG GTA TAC TCT TGC TTC] set R(23) [list AAC ACA ACT AGC ATA CAA CAT CCG CTT GAA GAC GCG GCT GGT GTA GTC TCG TCT TGG TGT] set R(24) [list ACA AGA AGT ATA ATC CAG CAT CCT CGC CTA CTC GAG GCG GGA GTA TAC TCG TCT TGG TGT] set R(25) [list ACG ACT AGG ATC CAA CAG CAT CGA CTG GAT GCC GCG GGT GTA GTC TAC TAT TCA TGC TTA] set R(26) [list AAT ATC ATG CAA CAC CAG CAT CGG CGT CTA GAC GAG GCG GGC GTA TAT TCA TCT TGC TGT] set R(27) [list ACA ACC ACG AGT ATG CAG CTA CTG GAC GCA GCC GCT GGA GTA GTC GTT TAT TCA TCG TTA] set R(28) [list AAC AAT ACA AGG AGT ATC ATT CAA CAC CAG CCG CGG CTG CTT GAT GCG GGT TCG TCT TGT] set R(29) [list ACA AGG AGT ATC ATG CCG CGA CTA GAG GCA GCG GGA GTA TAC TCA TCC TCG TCT TTA TTC] set R(30) [list AAG AAT ACC ATA ATT CAA CAC CCA CGG CGT CTG GAC GCA GCG GGC GTC GTT TAT TGT TTG] set R(31) [list AGA AGC AGG AGT ATA ATG CAA CCG CGA CGT CTA CTT GAT GCA GCC GTA GTT TCC TGC TTC] set R(32) [list AAT ACA ACG ATA ATC ATG ATT CAA CGC CGG CTA CTC GAC GCT GGC GGT GTC TCA TGG TGT] set R(33) [list ACA AGT ATC ATG CAA CAT CCT CGA CTA CTG CTT GAC GAT GCG GGA GGC GTA GTT TCA TCG] set R(34) [list AAT ACA ACT AGG ATA ATC ATG CCG CCT CTA CTT GAA GAT GCC GCT GGA GGC GTC GTG TTC] set R(35) [list AAC ACG ACT ATT CAA CAC CGA CTA CTC GAC GCC GCG GCT GGA GGT GTA TAT TGG TGT TTA] set R(36) [list AAC AAG ACA ACG AGC ATA ATG CCA CGG CTC CTG GGT GTA GTC GTT TAC TCA TCT TGC TGG] set R(37) [list AAT ACC ACT AGC AGT ATG CAA CAC CGG CTC GAG GAT GCC GTA GTC GTG GTT TCA TCG TTA] set R(38) [list AAC ACG ACT AGC ATG CAA CAC CCA CTT GAC GCA GGA GGT GTA GTC GTT TCC TGG TGT TTA] set R(39) [list AAG ACC ACG AGC AGG ATG ATT CAG CCT CTA CTC CTT GAC GCA GGC GGT TAC TAT TGG TTA] set R(40) [list AAG ACA AGT ATA ATC ATG ATT CAG CAT CCA CGG CTC CTG GAC GCA GCG GCT GTC TGT TTG] set R(41) [list AAC AAT ACA ACG AGC AGG AGT ATC CAT CGG CTC GCT GGA GTA TAC TCC TGC TGG TTC TTG] set R(42) [list AAC ACA ACC ACG ATT CAA CGT CTA CTG CTT GAC GCG GCT GGA GGC GTA TCA TGG TTA TTG] set R(43) [list AAC ACC ACT AGA AGG ATA ATC ATG CAG CCA CGG CTT GAC GCT GGC GGT GTC GTT TTA TTC] set R(44) [list AAG AAT ACC AGG AGT ATT CAA CAC CAT CGA CTA CTC GCG GTC GTG TAC TCG TGC TGG TTG] set R(45) [list AAC AAT ACA ACT AGC AGG ATG CCG CCT CGC CTG GAG GAT GCA GTC TAC TCT TGG TGT TTA] set R(46) [list ACG ACT AGA AGC AGT ATA CAA CAG CAT CCG CTT GAC GCC GCG GCT GTC GTT TAT TGG TTA] set R(47) [list ACC AGC AGG ATA ATC ATT CAC CAG CCA GAG GAT GCC GGC GTA GTC TCA TCG TGT TTA TTG] set R(48) [list AAC AAG ACT AGA AGC ATC CAA CAC CGA CTT GAT GCG GCT GGC GGT TAT TCC TGC TGT TTG] set R(49) [list AAG AAT ACC ACG ATG CAA CCG CCT CGA CGT CTA CTT GAA GGA GTA GTG TCG TGC TGT TTC] set R(50) [list AAC AAG AGC AGG ATC ATG ATT CAC CAG CAT CCA CCT CTT GAG GAT GCG GCT GGT TGT TTC] set R(51) [list AAC AAG AGC AGG CAA CAC CCT GAG GCT GGA GGC GTC GTG TAC TAT TCA TCC TTA TTC TTG] set R(52) [list AAC AAG ACG ACT AGT ATT CAC CCA CCT CGG CTC CTT GAG GAT GGA GTA TCA TCG TGG TTG] set R(53) [list AAG AAT ACA ACC ACT AGT CAC CCG CGC CGT GAG GCT GGC GGT GTA GTT TAC TAT TCA TGT] set R(54) [list AAT ACC AGC AGG ATA CAC CCA CCT CGA CTG GAG GGA GGT GTG GTT TAC TAT TCA TCG TCT] set R(55) [list AAC AAG ACC ACT AGA AGC AGT ATA CGA CGT CTC CTG CTT GCA GGC GTA GTC GTG GTT TTC] set R(56) [list AAG ACG ACT AGA ATA CAG CAT CCA CGC CGG CGT CTA GCA GCT GGC GTT TAC TGT TTA TTG] set R(57) [list AAC AAG ACC ACG ACT AGG AGT ATT CAG CGC CTT GCA GCC GGC GGT GTA TCA TTA TTC TTG] set R(58) [list AAC AAG ACA ACT AGA AGC ATG CAC CGC CTG CTT GCT GGA GTA GTT TCA TCC TCG TGG TGT] set R(59) [list AAG AAT ACC ACG AGC ATC ATG CGG CTA CTG CTT GAA GCA GCG GTG TAT TCA TCC TCG TTG] set R(60) [list AAG ACG AGA AGC AGT ATT CCA CCG CCT CTA GAA GAT GCC GCT GGA GTC TAT TCG TTC TTG] set R(61) [list AGA ATC ATG ATT CAA CAC CAT CCG CCT CTT GAA GAG GAT GCA GCG GTC GTG TAC TGC TTG] set R(62) [list AAG AAT ATC ATG ATT CAA CCA CCT CTC GAG GAT GCA GCG GCT GGC GTA GTC TCA TCG TTG] set R(63) [list ACA AGT ATA ATG CAA CCG CCT CTA CTG GAA GAC GCC GGC GTA GTG GTT TAC TAT TCC TGG] set R(64) [list AAC AAG ACG ATA ATG CAA CCA CCT CGA CGC CGG CTA GAG GCT GTC GTT TCG TGT TTA TTG] set R(65) [list AAC ACC ACT AGC AGT ATA CAC CAG CAT CGG GAA GCC GCG GTT TAC TGC TGG TGT TTA TTG] set R(66) [list ACC AGA AGC AGG AGT ATG ATT CAA CTA CTC GAG GCA GGC GTC GTG TCA TCC TCG TCT TTA] set R(67) [list AAC ACA ACG AGC AGG ATA ATC CCA CGC CTG CTT GAT GGC GGT GTC TAC TAT TCG TGG TTA] set R(68) [list ACC ACG ACT AGG ATC ATG ATT CAG CAT CCA CCG CGC CTG CTT GAA GAG GAT GGT GTT TAT] set R(69) [list AAC AAG ACC AGG ATT CAG CCA CCT CTA CTT GAC GAG GCA GCT GGA GTT TAT TCT TGC TGG] set R(70) [list AAT ACC ACG ACT AGA ATT CAT CCG CGC CGT CTC CTG GAA GAC GAG GCG TAT TGG TTA TTG] set R(71) [list AAC AAT ACG ACT ATC ATT CAA CCG CCT CGC CGG CTA GAG GAT GCG GGA GGT TAT TCT TTG] set R(72) [list AAC ACG ACT AGC ATC CAT CGT CTA GAA GAC GCC GGA GGC GTA GTT TCA TCG TGC TGT TTA] set R(73) [list AAC ACG AGG ATT CAC CAG CAT CCA CGA CGG CGT GCA GCT GGA TAC TAT TGG TTA TTC TTG] set R(74) [list AAG ATA ATT CAA CAC CGA CGG CGT CTC CTG GAA GAC GAG GAT GTC GTT TAC TCC TGC TTG] set R(75) [list AAG AGA AGG ATC ATG CAC CAT CCA CCT GAG GCA GCC GGT GTA GTC TAT TCA TCT TGC TGT] set R(76) [list AAG AAT ACT AGA AGG ATT CAC CCT CGC CGT CTA CTC CTT GAG GGA GGC GTA GTG TAC TCT] set R(77) [list AAC AAT ACC ACG ACT AGG ATC ATG ATT CAG CGT CTC GAC GCA GGC GGT GTG GTT TAC TTC] set R(78) [list AAT ACA AGC ATA ATT CAA CCT CGG CGT CTC CTG CTT GAA GAG GCC GGA GGT GTC TCA TGT] set R(79) [list AAT ACA ACC AGA ATA ATG ATT CAG CAT CCT CGT CTG GAT GCA GCC GGC GTC GTG TCG TGT] set R(80) [list AAC AAT ACA AGA AGG ATC ATT CAG CAT CGG CTA GAT GCC GCT GGC GTC GTT TCG TCT TGC] set R(81) [list AAC AAT ACA ACC AGA ATA ATC ATG CCG CGT CTC GCA GCG GCT GGA GTC GTG TCT TGT TTG] set R(82) [list ACC ACG AGC AGT ATA ATC CAA CAG CGT CTA CTC CTG CTT GAC GCG GGA GTA TAT TGG TGT] set R(83) [list ACA AGA ATG ATT CAC CAG CCT CGC CTC CTT GAA GAT GCA GCG GCT GGA GTG TAT TCA TTG] set R(84) [list AAC AAT ACC ACG AGC AGG CAA CCG CGA CGT CTT GAC GCA GCT GGA GTT TCT TGT TTA TTG] set R(85) [list AAC AAT ACA ATA ATC CAG CCG CGT CTG GCA GCT GGA GGC GGT GTA GTC TCA TCT TGC TTA] set R(86) [list ACG AGA AGC ATA CAA CAG CCA CCT CGG CGT GAA GCG GGT GTA TCA TCT TGC TTA TTC TTG] set R(87) [list ACA ACC ACG ATC ATT CAA CCA CCT CGA GAA GAC GAG GCA GTG GTT TGC TGG TGT TTC TTG] set R(88) [list AAC AAG ACC ACT AGC ATC ATG ATT CGA CGC CGG GAT GGC GTC GTG TAC TCA TGG TTA TTC] set R(89) [list AAC AAG ACA AGA AGT ATT CAG CCT GAT GCC GGA GGC GGT TAC TCA TCC TCG TGC TGT TTC] set R(90) [list AAT ACT AGC AGG ATC ATG CAA CAC CGT CTA GAC GCA GCG GCT GGT GTA GTC TCG TCT TTA] set R(91) [list AAT ACC AGT ATC ATG ATT CAG CAT CGC CTA GAA GAC GAG GCG GCT GGT GTT TCA TCC TCG] set R(92) [list AAG ACA AGG ATA ATG CAA CCT CGA CGC CGG CTG CTT GAA GAC TCA TCG TCT TGC TGT TTG] set R(93) [list AAG AGT ATG ATT CAA CAC CAG CCA CCG CGA CGT GAT GCG GCT GTA GTC TAC TCG TTA TTG] set R(94) [list AAC AAG ACC ACG ACT AGT ATT CCA CGC CGT CTG GAA GAC GAG GGT GTA GTT TCC TCT TTG] set R(95) [list AAG ACC ACG AGC AGG AGT ATA ATT CCG CGA CGC CGG CTT GAT GCC GTA TAT TCT TGC TTA] set R(96) [list AAT ACA ACG AGA AGC CAC CTC CTT GAC GAT GCA GCG GGA GTG TCA TCG TCT TGG TTA TTC] set R(97) [list AAC ACT AGA AGG AGT CAA CGA CGG CGT CTA CTC CTT GAA GAT GCG GCT GGC TCT TTA TTC] set R(98) [list AAG AAT ACG ATC CAG CCG CGA CGC CTG CTT GAA GAT GCA GCC GTA GTG TCA TCT TTA TTG] set R(99) [list AGT ATG CAA CAT CCA CGA CGC CTA CTC GAC GAT GCA GGA GGT GTA GTC TAC TCT TGG TTA] set R(100) [list AAT ACC ACG AGA AGT ATA ATC CCA CCG CCT CGA CTA CTG CTT GGA GGC GGT GTG GTT TTA] set R(101) [list AAC AAT ATA ATG CAA CCA CCG CCT CGC CGG GAA GAT GGC GGT GTG TAC TCA TGT TTC TTG] set R(102) [list ACA ACC AGA ATA ATG ATT CAC CAG CTA CTC CTT GAG GAT GCC GCG GGC GTG TCG TGT TTA] set R(103) [list AAC AAT ACC ACT AGA ATC CAC CCG CGA CGG CTG CTT GCA GGA GGC GTA GTT TAT TGT TTG] set R(104) [list AAC ACC ACG AGT ATC ATG ATT CAT CCG CTC GAC GAG GAT GCA GGT GTA GTC TAC TCG TTG] set R(105) [list AAG AAT ACC AGA AGT ATA CAC CAG CAT CGG CTA CTC GCC GCG GTA GTG GTT TCC TGT TTG] set R(106) [list AAC AAT ACC AGC ATA CAA CGA CGG CTT GAC GAG GCT GGC GGT GTC TAT TCA TCG TCT TTG] set R(107) [list ACA AGC ATA ATG ATT CAG CCA CGA CGC CGG CGT CTA CTT GAA GGA GGT TCA TCG TCT TGT] set R(108) [list AAG AGC AGT ATA ATG CAC CAG CCA CGA CGC CGG CTT GCT GGA GTC TAT TCA TGC TGT TTA] set R(109) [list AAC AAT ACG ACT ATC CAA CAG CTC CTT GAA GAG GAT GCG GCT GGC GGT GTT TAT TCC TGC] set R(110) [list AAC ACA AGC AGG AGT ATA ATT CCA CCT CTC GAG GCG GCT GGA GGC TCA TCT TGC TGT TTA] #rR 20180428 I add the lsort -dictionary set ListOfIndex [lsort -dictionary [array names R]] set R(MFC) [YCodeFor] if {$Qui=="MFC"} { return $R($Qui) } set R(TOP20) [YCodeFor "" TOP20] if {$Qui=="TOP20"} { return $R($Qui) } set R(111) [list ATC ACC CTC GAC GAG GAT GCC GGC GGT GTC] foreach I [C216 ListOf Index] { set K [expr $I + 1000] set R($K) [C216 $I] } #rR Use setgscoperr MAMAxy if you want xy to be the default circode if { ! [info exists DefaultCirCode]} { set DefaultCirCode 00 foreach CC $ListOfIndex { if {[OnTraite "MAMA$CC"]} { set DefaultCirCode $CC } if {[OnTraite "MUMU$CC"]} { set DefaultCirCode $CC } } } #rR You can see the default CirCode with CirCode DefaultCirCode if {$Qui=="DEFAULTCIRCODE"} { if {$Quoi=="MFC"} { set DefaultCirCode "MFC" set R($DefaultCirCode) [YCodeFor] IsX0 Reset return $DefaultCirCode } if {$Quoi=="TOP20"} { set DefaultCirCode "TOP20" set R($DefaultCirCode) [YCodeFor "" TOP20] IsX0 Reset return $DefaultCirCode } #rR .. and even set it to xy with CirCode DefaultCirCode xy if {[regexp {^[0-9]+$} $Quoi]} { set DefaultCirCode $Quoi IsX0 Reset } return $DefaultCirCode } if {[regexp -nocase "DEFAULTCIRCODE" $Qui]} { return $DefaultCirCode } if {$Qui==""} { set Qui $DefaultCirCode } if {$Qui eq "LISTOF" && $Quoi eq "INDEX"} { return $ListOfIndex } if {$Qui=="MFC"} { return $R($Qui) } if {$Qui=="TOP20"} { return $R($Qui) } if {[regexp {[A-Z]} $Qui]} { foreach {K V} [array get R] { foreach Codon $V { lappend Codes($Codon) $K lappend LesCodons $Codon set AA [AAduCodon $Codon] lappend Codes($AA) $K } } set LesCodons [lsort -unique $LesCodons] set LesAAs {} foreach Codon $LesCodons { lappend LesAAs [AAduCodon $Codon] } if {$Qui eq "LISTOF" && $Quoi eq "CODON"} { return $LesCodons } if {$Qui eq "LISTOF" && $Quoi eq "AA"} { return $LesAAs } if {$Qui eq "LISTOF" && $Quoi eq "AA3"} { return [join $LesAAs " "] } if {0 && [info exists Codes($Qui)]} { return $Codes($Qui) } return "" } if { ! [info exist R($Qui)]} { return "" } if {[string index $Quoi 0]=="P"} { set LesP {} foreach Codon $R($Qui) { lappend LesP [AAduCodon $Codon] } if {[regexp 3 $Quoi]} { return [join $LesP " "] } return $LesP } if {[info exists R($Qui)]} { return $R($Qui) } #rR I Add this to get the 216 codes if {$Qui>1000} { set I [expr $Qui-1000] set X [C216 $I] return $X } return "" } proc CirCodeProt {{GetWhat ""}} { if {$GetWhat==""} { set GetWhat "GetWithPWithX" } set LesLignes {} if {[regexp "P" $GetWhat]} { foreach I [CirCode ListOf Index] { set LaLigne [list $I] foreach P [CirCode $I P] { lappend LaLigne $P } set Ligne [join $LaLigne " "] lappend LesLignes $Ligne } } if {[regexp "P" $GetWhat] && [regexp "X" $GetWhat]} { lappend LesLignes "" lappend LesLignes "" } if {[regexp "X" $GetWhat]} { set LesAAs [list A C D E F G H I K L M N P Q R S T V W Y] lappend LesLignes " [join $LesAAs { }]" foreach I [CirCode ListOf Index] { set LesAAsPresents [lsort -unique [CirCode $I P]] set LaLigne [list $I] foreach A $LesAAs { if {[lsearch -sorted $LesAAsPresents $A]>=0} { lappend LaLigne "X" } else { lappend LaLigne " " } } set Ligne [join $LaLigne " "] lappend LesLignes $Ligne } } return [join $LesLignes "\n"] } proc RenameMAMA {} { #rR ne sert plus .. c'était pour renommer MAMF en MAMA exit NousAllonsAuBoulot [RepertoireDuGenome] set FilesOnly {} lappend FilesOnly "*_SumOfPairs" lappend FilesOnly "*_SumOfPairsForCodons" lappend FilesOnly "X*MotifsFeatures" #lappend FilesOnly "nuctfa" #lappend FilesOnly "prottfa" #lappend FilesOnly "nucalitfa" #lappend FilesOnly "protalitfa" #lappend FilesOnly "msfNuc" #lappend FilesOnly "msfNuc3" #lappend FilesOnly "msfProt" #lappend FilesOnly "msfProt3" foreach D $FilesOnly { set LesR [glob -type d $D] foreach R $LesR { Espionne $R NousAllonsAuBoulot $R exec renommeRR MAMF MAMA OnRevientDuBoulot } } OnRevientDuBoulot return set FilesContent {} lappend FilesContent "fiches/bornesdespabs" NousAllonsAuBoulot "[RepertoireDuGenome]/macsimXml" foreach Nom [ListeDesPABs] { regsub MAMF $Nom MAMA New file rename $Nom $New exec renommeRR MAMF MAMA } OnRevientDuBoulot } proc CirCodeInfo {{Qui ""} {Quoi ""}} { return "Wrong" #rR Find here all what could be available about CirCode global CirCodeInfo if {[info exists CirCodeInfo($Qui,$Quoi)]} { return $CirCodeInfo($Qui,$Quoi) } if {[info exists CirCodeInfo("EstCharge")]} { if {[CirCode $Qui $Quoi]!=""} { return [CirCode $Qui $Quoi] } return "" } set CirCodeInfo("EstCharge") 1 set CirCodeInfo(P,) [CirCodeProt] return [CirCodeInfo $Qui $Quoi] }