#rR gscope_pampas.tcl #rR pris de grosjean/gscopublic proc CorrigeMacsimsForPampas {} { foreach Nom [ListeDesPABs] { set M [ContenuDuFichier [GscopeFile $Nom "macsimXmlFAUX"]] set N $M regsub {[^<]+} $N "$Nom" N regsub {[^<]+} $N "$Nom" N Espionne [Sauve $N dans [GscopeFile $Nom "macsimXml"]] } exit } proc CreatePampasMsfFromExistingTfaOrMsfAVANT_20200518 {Dir} { file mkdir "[RepertoireDuGenome]/msf" file mkdir "[RepertoireDuGenome]/macsimXml" set TmpFile [TmpFile] Espionne $TmpFile foreach File [lsort -dictionary [glob -nocomplain "$Dir/*"]] { Espionne $File set Queue [file tail $File] if { ![regexp {BBS([0-9]+)_} $Queue Match I] } { FaireLire "Ca merde avec \n$File" } set Nom [format "%s%2.2d" [PreFixe] $I] set Txt [ContenuDuFichier $File] set Access "" if {[regexp {^>([^ \n]+)( |\n)} $Txt Match Access]} { regsub "^>$Access" $Txt ">$Nom" Txt } elseif {[regexp {Name: ([^ ]+) } $Txt Match Access]} { regsub "Name: $Access" $Txt "Name: $Nom" Txt regsub -all ">$Access" $Txt ">$Nom" Txt } if {$Access==""} { FaireLire "Ca merde pour le contenu de \n$File"; continue } lappend LesAccess $Access Sauve $Txt dans $TmpFile set FichierMsf [GscopeFile $Nom msf] set FichierXml [GscopeFile $Nom macsimXml] exec clustalw $TmpFile -convert -output=gscope -outfile=[GscopeFile $Nom msf] exec clustalw $FichierMsf -convert -output=macsim -outfile=$FichierXml } file delete $TmpFile return $LesAccess } proc PPUniprotPourTous {} { file mkdir "[RepertoireDuGenome]/uniprot" foreach Nom [ListeDesPABs] { set Entete [EnteteDuFichierTFA [GscopeFile $Nom prottfa]] lassign [split $Entete " "] N BId Access set Uniprot [GeneQuid UniprotData $Access] Sauve $Uniprot dans [GscopeFile $Nom uniprot] } return done } proc PPInformePourTous {} { file mkdir "[RepertoireDuGenome]/infos" set LesLouches {} foreach Nom [ListeDesPABs] { set Entete [EnteteDuFichierTFA [GscopeFile $Nom prottfa]] lassign [split $Entete " "] N BId Access lassign [split [GeneQuid UniprotData $Access "QY,AC,ID,GN,DE"] "\n"] Q A I G D if { ! [regexp $Access $A]} { lappend LesLouches "$Access $A" } regsub { .+$} $I "" I set iEgal [string last "=" $G] if {$iEgal>=0} { set G [string range $G $iEgal+1 end] } scan $G "%s" G set iEgal [string last "=" $D] if {$iEgal>=0} { set D [string range $D $iEgal+1 end] } regsub { \{.+$} $D "" D Espionne ******************* $Nom $Access $I $G $D InformeSansDemander $Nom "AC: $Access" InformeSansDemander $Nom "ID: $I" InformeSansDemander $Nom "GN: $G" InformeSansDemander $Nom "DE: $D" if {$G!=""} { InformeSansDemander $Nom "ValiGN: $G" } } set FicLouches [SauveLesLignes $LesLouches dans "[RepertoireDuGenome]/fiches/LesLouches.txt"] return $FicLouches } proc CreatePampasDbFromGscopeProject {} { #rR Attention ceci est la dernière étape #rR Il faut d'abord avoir fait #rR mkdir /genomics/link/TRUC #rR mkdir /genomics/link/TRUC/DeKirsley #rR y copier les alignements #rR setgscoperr Zero #rR CreatePampasMsfFromExistingTfaOrMsf /genomics/link/TRUC TR PPInformePourTous PPUniprotPourTous CreatePampasDb return "Ok" } proc CreatePampasMsfFromExistingTfaOrMsf {Dir {Prefixe ""} {FormatOrfNumbering ""}} { #rR Kirsley a des fasta alignes le premier est toujours Homo sapiens file mkdir "$Dir/beton" file mkdir "$Dir/fiches" file mkdir "$Dir/fasta" file mkdir "$Dir/prottfa" file mkdir "$Dir/msf" file mkdir "$Dir/macsimXml" set LesTfa [lsort -dictionary [glob -nocomplain "$Dir/DeKirsley/*.tfa"]] set N [llength $LesTfa] if {$N<9} { set LesTfa [lsort -dictionary [glob -nocomplain "$Dir/DeKirsley/*.fasta"]] } set N [llength $LesTfa] if { ! [OuiOuNon "Je vais traiter $N fasta"]} { return "" } if {$Prefixe==""} { set Prefixe [file tail $Dir] } if {$FormatOrfNumbering=="" && $N<100 } { set FormatOrfNumbering "%2.2d" } if {$FormatOrfNumbering=="" && $N<1000 } { set FormatOrfNumbering "%3.3d" } if {$FormatOrfNumbering=="" && $N<10000 } { set FormatOrfNumbering "%4.4d" } if {$FormatOrfNumbering=="" && $N<100000 } { set FormatOrfNumbering "%5.5d" } set I 0 set LesNoms {} set TmpFile [TmpFile] foreach File $LesTfa { incr I Espionne $File set Queue [file tail $File] set Nom [format "%s$FormatOrfNumbering" $Prefixe $I] lappend LesNoms $Nom set Txt [ContenuDuFichier $File] set AccessOriginal "" if {[regexp {^>([^ \n]+)( |\n)} $Txt Match AccessOriginal]} { #regsub "^>$Access" $Txt ">$Nom" Txt } elseif {[regexp {Name: ([^ ]+) } $Txt Match AccessOriginal]} { exec clustalw $File -convert -output=FASTA -outfile=$TmpFile set Txt [ContenuDuFichier $TmpFile] } if {$AccessOriginal==""} { FaireLire "Ca merde pour le contenu de \n$File"; continue } lappend LesAccessOriginal $AccessOriginal set LeNew {} if {[info exists DejaVu]} { unset DejaVu } foreach Ligne [split $Txt "\n"] { if { ! [regexp "^>" $Ligne]} { lappend LeNew $Ligne; continue } lassign [BIdAccessFromEntete $Ligne] BId Access [BIdAccessFromEntete $Ligne] if {[info exists DejaVu($BId)]} { incr DejaVu($BId); append BId "_$DejaVu($BId)" Espionne "DejaVu $BId" } else { set DejaVu($BId) 1 } if { ! [info exists AccessDe($Nom)]} { set AccessDe($Nom) $BId } set Reste "" ; regexp {^[^ ]+ (.+)$} $Ligne Match Reste set Entete ">$BId $Access" if {$Reste!=""} { append Entete " $Reste" } lappend LeNew $Entete } set LeTfasDe($Nom) $LeNew } foreach Nom $LesNoms { set LeNew $LeTfasDe($Nom) set Access $AccessDe($Nom) set FichierTfa "$Dir/prottfa/$Nom" set FichierTfas "$Dir/fasta/$Nom" set FichierMsf "$Dir/msf/$Nom" set FichierXml "$Dir/macsimXml/$Nom" SauveLesLignes $LeNew dans $FichierTfas exec clustalw $FichierTfas -convert -output=gscope -outfile=$FichierMsf exec clustalw $FichierMsf -convert -output=macsim -outfile=$FichierXml set TfaRef [LaSequenceDuTFAs $FichierTfas $Access] set EntRef [EnteteDuTexteTFA $TfaRef] set SeqGap [QueLaSequenceDuTexteTFA $TfaRef] set SeqGap [string toupper $SeqGap] regsub -all {[^A-Z]} $SeqGap "" Seq regsub {^>} $EntRef ">$Nom " Entete Sauve [SequenceFormatTFA $Seq $Entete] dans $FichierTfa } set Premier 1 set Dernier $I set FNum $FormatOrfNumbering set RepGen $Dir CreeBornesDesPABsTroisGradins $Premier $Dernier $Prefixe "" "" $FNum $RepGen CreeFichierMiniConfig $Prefixe $FNum "Prot" "" "" "" $RepGen AppendAuFichier "$RepGen/beton/miniconfig" "OnTraiteLike Pampas" set LeMessage {} lappend LeMessage "Attention maintenant" lappend LeMessage "setgscoperr [file tail $Dir]" lappend LeMessage "gscope puts CreatePampasDbFromGscopeProject" return [join $LeMessage "\n"] } proc BIdAccessFromEntete Entete { scan $Entete "%s" Entete regsub ">" $Entete "" Access if { ! [regexp {^([^_]+)_([^_]+)_([^_]+)$} $Access Match A B O]} { return [list $Access "."] } set BId ${B}_$O return [list $BId $A] } proc PampasServerDir {} { #rR Attention existe dans pampasserver/server.tcl et gscope/gscope_pampas.tcl global PampasServerDir set PampasServerDir "/home/ripp/pampasserver" return $PampasServerDir } proc PPDB {{Qui ""} {Quoi ""} {Quid ""}} { global PPDB #rR PPDB Pampas DataBase #rR PPDB get this help #rR PPDB Pro ListOf Pk lists all Pk of proteins #rR PPDB Pro ListOf GscopeId GscopeIds #rR PPDB Pro ListOf Access Access #rR PPDB Pro ListOf Definition Definition #rR PPDB Pro ListOf W Wildtype Amino Acids (ex. R) #rR PPDB Pro ListOf Pos Position #rR PPDB Pro ListOf M Mutated Amino Acids (ex. YW) #rR PPDB Pro ListOf Code R34YW #rR PPDB Pro ListOf Code3 Arg34TyrTrp #rR #rR for all xxx as Pk GscopeId Access Definition #rR PPDB Pro xxx ListOfMutPk lists all Pk of mutations for the specified protein xxx #rR PPDB Pro xxx ListOfMutCode R34YW A55G ... #rR PPDB Pro xxx ListOfMutCode3 Arg34TyrTrp Ala55Gly ... #rR PPDB Pro xxx ListOfMutW R A #rR PPDB Pro xxx ListOfMutPos 34 55 #rR PPDB Pro xxx ListOfMutM YW G #rR PPDB Pro xxx ListOfMutDbSnp rs123456 rs123457 #rR PPDB Pro xxx Info gets the Info (it uses ExtraitInfo xxx ) #rR PPDB Pro xxx ID: ID (it uses ExtraitInfo xxx ID) #rR PPDB Pro xxx ValiGN: ValiGN (it uses ExtraitInfo xxx ValiGN:) #rR etc #rR #rR for PkM (Pk of a mutation) #rR PPDB Mut PkM Pk get the Pk from the Pk #rR PPDB Mut PkM W R #rR PPDB Mut PkM M YW #rR PPDB Mut PkM Pos 34 #rR PPDB Mut PkM DbSnp rs123456 #rR PPDB Mut PkM PKP Pk of the corresponding protein #rR Fin Help set Qui [string toupper $Qui] set Quoi [string toupper $Quoi] set Quid [string toupper $Quid] if {$Qui=="" && $Quoi=="" && $Quid==""} { set Qui "HELP" } if {[info exists PPDB($Qui,$Quoi,$Quid)]} { return $PPDB($Qui,$Quoi,$Quid) } if {[info exists PPDB("EstCharge")]} { if {[info exists PPDB($Qui,$Quoi,PK)]} { set Pk $PPDB($Qui,$Quoi,PK) if {[info exists PPDB($Qui,$Pk,$Quid)]} { return $PPDB($Qui,$Pk,$Quid) } } if {[info exists PPDB($Qui,$Quoi,GSCOPEID)]} { set Nom $PPDB($Qui,$Quoi,GSCOPEID) if {$Quid=="INFO"} { set Info [ExtraitInfo $Nom] } else { set Info [ExtraitInfo $Nom $Quid] } if {$Info!=""} { return $Info } } return "" } set PPDB("EstCharge") 1 foreach Ligne [split [info body PPDB] "\n"] { if { ! [regexp "#rR" $Ligne]} { continue } if {[regexp "Fin Help" $Ligne]} { break } regsub "#rR " [string trim $Ligne] "" Message lappend LeHelp $Message } set PPDB(HELP,,) [join $LeHelp "\n"] set Handle [CanalSqlPampas] set PPDB(HANDLE,,) $Handle foreach {Pk GscopeId Access Definition} [SqlExec "select pk_protein,gscopeid,access,definition from protein order by gscopeid" "GetList"] { set PPDB(PRO,$Pk,PK) $Pk set PPDB(PRO,$GscopeId,PK) $Pk set PPDB(PRO,$Access,PK) $Pk set PPDB(PRO,$Definition,PK) $Pk set PPDB(PRO,$Pk,GSCOPEID) $GscopeId set PPDB(PRO,$GscopeId,GSCOPEID) $GscopeId set PPDB(PRO,$Access,GSCOPEID) $GscopeId set PPDB(PRO,$Definition,GSCOPEID) $GscopeId set PPDB(PRO,$Pk,ACCESS) $Access set PPDB(PRO,$Pk,DEFINITION) $Definition lappend PPDB(PRO,LISTOF,PK) $Pk lappend PPDB(PRO,LISTOF,GSCOPEID) $GscopeId lappend PPDB(PRO,LISTOF,ACCESS) $Access lappend PPDB(PRO,LISTOF,DEFINITION) $Definition } foreach K [list PK GSCOPEID ACCESS DEFINITION] { set PPDB(PRO,LISTOF,$K) [lsort $PPDB(PRO,LISTOF,$K)] } foreach {PkM W Pos M DbSnp PkP} [SqlExec "select pk_mutation,w,pos,m,DbSnp,pk_protein from mutation order by pk_mutation" "GetList"] { set PPDB(MUT,$PkM,PK) $PkM set PPDB(MUT,$PkM,W) $W set PPDB(MUT,$PkM,M) $M set PPDB(MUT,$PkM,POS) $Pos set PPDB(MUT,$PkM,DBSNP) $DbSnp set PPDB(MUT,$PkM,PKP) $PkM set W3 "" foreach A [split $W ""] { append W3 [string totitle [UT $A]] } set M3 "" foreach A [split $M ""] { append M3 [string totitle [UT $A]] } set Code $W$Pos$M set Code3 $W3$Pos$M3 set PPDB(MUT,$PkM,CODE) $Code set PPDB(MUT,$PkM,CODE3) $Code3 set PPDB(MUT,$PkM,) [list $PkM $W $M $Pos $Code $Code3 $DbSnp] lappend PPDB(PRO,$PkP,LISTOFMUTPK) $PkM lappend PPDB(PRO,$PkP,LISTOFMUTCODE) $Code lappend PPDB(PRO,$PkP,LISTOFMUTCODE3) $Code3 lappend PPDB(PRO,$PkP,LISTOFMUTW) $W lappend PPDB(PRO,$PkP,LISTOFMUTM) $M lappend PPDB(PRO,$PkP,LISTOFMUTPOS) $Pos lappend PPDB(PRO,$PkP,LISTOFMUTDBSNP) $DbSnp lappend PPDB(PRO,LISTOF,W) $W lappend PPDB(PRO,LISTOF,M) $W lappend PPDB(PRO,LISTOF,POS) $Pos lappend PPDB(PRO,LISTOF,CODE) $Code lappend PPDB(PRO,LISTOF,CODE3) $Code3 lappend PPDB(PRO,LISTOF,DBSNP) $DbSnp } return [PPDB $Qui $Quoi $Quid] } catch { package require sqlite3 } proc QaG {Question {Format ""}} { set Reponse [eval $Question] if {$Format=="List" && [string is list $Reponse]} { set Reponse [join $Reponse "\n"] } if {$Format=="HTML"} { regsub -all "<" $Reponse {\<} Reponse regsub -all ">" $Reponse {\>} Reponse } set Reponse [encoding convertto "utf-8" $Reponse] return $Reponse } proc FoundProteinName Proteins { set args [split $Proteins " "] set i 0 set list1 [IDMapping ACC+ID ACC $args] foreach tmp $args { if {[lindex $list1 $i] != ""} { set args [lreplace $args $i $i [lindex $list1 $i]] } incr i } set i 0 set list2 [IDMapping GENECARDS_ID ACC $args] foreach tmp $args { if {[lindex $list2 $i] != ""} { set args [lreplace $args $i $i [lindex $list2 $i]] } incr i } set i 0 set list3 [IDMapping P_REFSEQ_AC ACC $args] foreach tmp $args { if {[lindex $list3 $i] != ""} { set args [lreplace $args $i $i [lindex $list3 $i]] } incr i } return $args } proc checkInventory {} { set textInventory [Inventaire GetText] set nbProt [lindex [regexp -inline {([0-9]*)[A-Z a-z " "]*are available} $textInventory] 1] set listInventory [list] set str [lindex [regexp -inline {([0-9]*)[" "]*blastp} $textInventory] 1] if {$str != ""} {lappend listInventory "$str/$nbProt Blastp " } else {lappend listInventory "0/$nbProt Blastp "} set str [lindex [regexp -inline {([0-9]*)[" "]*macsimXml} $textInventory] 1] if {$str != ""} {lappend listInventory "$str/$nbProt Macsims " } else {lappend listInventory "0/$nbProt Macsims "} set str [lindex [regexp -inline {([0-9]*)[" "]*aliinout} $textInventory] 1] if {$str != ""} {lappend listInventory "$str/$nbProt Align " } else {lappend listInventory "0/$nbProt Align "} set str [lindex [regexp -inline {([0-9]*)[" "]*structure} $textInventory] 1] if {$str != ""} {lappend listInventory "$str/$nbProt Structure " } else {lappend listInventory "0/$nbProt Structure"} #lappend listInventory "for $nbProt Proteins" return [join $listInventory ","] #puts "$blastp/$nbProt $macsimxml/$nbProt $align/$nbProt $structure/$nbProt" } proc catchBlast {Protein} { set response [GscopeFileContent $Protein blastp] set response [lindex [split $response ">"] 0] set response [split $response "\n"] set listBlast "" set i 0 set OnAttendSequencesProducing 1 foreach line $response { if { $OnAttendSequencesProducing } { lappend listBlast $line if { ! [regexp "Sequences producing" $line] } { continue } set OnAttendSequencesProducing 0 lappend listBlast "" continue } if {[string trim $line]==""} { continue } if {[regexp {^>} $line]} { break } set cutLine [split $line "|"] if {[llength $cutLine] > 1} { set linkUniprot "http://www.uniprot.org/uniprot/[lindex $cutLine 1].txt" set cutLine [lreplace $cutLine 1 1 "[lindex $cutLine 1]"] lappend listBlast "[join $cutLine |]" continue } if {[regexp -nocase {^PDB[\:_]([0-9][0-9a-z]+)([^ ]+)( |$)} $line match accessPDB chaine]} { set linkPDB "https://www.rcsb.org/pdb/explore/explore.do?structureId=$accessPDB" regsub {^PDB[:_][^ ]+( |$)} $line "&0" line lappend listBlast "$line" continue } lappend listBlast $line } return [join $listBlast "\n"] } proc ProjetsPampas {} { set pathProjects [split [exec tcsh -c "grep \"OnTraiteLike Pampas\" /genomics/link/*/beton/miniconfig"] "\n"] set nameList [list] foreach path $pathProjects { if { ! [regexp {/genomics/link/([^/]+)/beton/miniconfig} $path Match dir]} { continue } lappend nameList $dir } return [join $nameList ","] } proc RemoveBadCharacters Proteins { set Proteins [regsub -all {[^A-Za-z0-9\-\_\.]+} $Proteins " "] set Proteins [string trim $Proteins] return $Proteins } proc CatchDataUniprot {texteEmbl protein} { if { ! [regexp "\n" $texteEmbl]} { set texteEmbl [GeneQuid UniprotData $texteEmbl] } ;#rR au cas où on ne donne que l'id ou l'ac ... set lignesEmbl [split $texteEmbl "\n"] set id "" set ac "" set de "" set gn "" foreach line $lignesEmbl { scan $line "%s" k set info [string range $line 5 end] if {$k == "ID"} { regsub {[ ;].*} $info "" info set id $info } if {$k == "AC"} { regsub {[ ;].*} $info "" info set ac $info } if {$k == "GN" && $gn==""} { regsub {[ ;].*} $info "" info set gn $info regsub {^Name=} $gn "" gn } if {$k == "DE" && $de==""} { set de $info regsub {^RecName: } $de "" de regsub {^Full=} $de "" de } } return [list $protein $id $ac $gn $de] } proc CheckProteinList textOfWantedProteins { set textOfWantedProteins [RemoveBadCharacters $textOfWantedProteins] set listOfWantedProteins [split $textOfWantedProteins " "] set ListOfListOfData {} foreach protein $listOfWantedProteins { set uniprot [GeneQuid UniprotData $protein] if {$uniprot == ""} { set id [lindex [split [IdAcGnForList $protein Id] ":"] 1] set uniprot [GeneQuid UniprotData $id] if {$uniprot == ""} { set uniprot [GeneQuid UniprotData [lindex [split [IdAcGnForList $id Ac] ":"] 1]] } if {$uniprot == ""} { set possibles [IDMapping_sql "GENE_NAME" "ACC+ID" $protein 9606] set fiche "" foreach possible [split $possibles " "] { set fiche [GeneQuid UniprotData $possible] if {$fiche!=""} { break } } if {$fiche!=""} { set uniprot $fiche } } } if {$uniprot == ""} { lappend ListOfListOfData [list "!$protein"] } else { set ListOfData [CatchDataUniprot $uniprot $protein] lappend ListOfListOfData $ListOfData } } set NewList {} foreach ListOfData $ListOfListOfData { set id [lindex $ListOfData 1] set ID [string toupper $id] set Ligne [join $ListOfData "\t"] if {[info exists DejaVu($ID)] && [string index $ListOfData 0] != "!" } { set Ligne "!!$Ligne" } set DejaVu($ID) 1 lappend NewList $Ligne } return [join $NewList "\n"] } proc FillMutationTable {db} { set LesPkProtein [$db eval {SELECT pk_protein FROM protein order by pk_protein}] foreach Pk $LesPkProtein { set Access [$db eval {select access from protein where pk_protein=$Pk}] # set content [ContenuDuFichier "http://www.uniprot.org/uniprot/$Access.txt"] set content [GeneQuid UniprotData $Access] set lst [list] foreach ligne [split $content "\n"] { #set ligne [regexp -all -inline {\S+} $ligne] #set temp [split $tmp " "] set LesMots [LesMotsDuTexte $ligne] lassign $LesMots K V position bidon w bidon2 m set DbSnp [StringApres "dbSNP:" dans $ligne] regsub {\).*$} $DbSnp "" DbSnp if {$K=="FT" && $V == "VARIANT"} { if {$w == "Missing"} { $db eval {INSERT INTO Mutation(pos, pk_protein, DbSnp) VALUES ($position, $Pk, $DbSnp)} } else { $db eval {INSERT INTO Mutation(w, pos, m, pk_protein, DbSnp) VALUES ($w, $position, $m, $Pk, $DbSnp)} } } } } } proc CreatePampasDb {} { set FichierDb [PampasDb] set tbl " - protein { . pk_protein P . gscopeid V32 . access V32 . definition text } - mutation { . pk_mutation P . w V32 . pos I . m V32 . DbSnp V32 . pk_protein I } " set lstNom [ListeDesPABs] set db [FabriqueDeTables $tbl "" $FichierDb] foreach Nom $lstNom { set Access "" if {$Access==""} { set Access [ExtraitInfo $Nom "Access:"] } if {$Access==""} { set Access [ExtraitInfo $Nom "AC:"] } if {$Access==""} { set Access [lindex [split [IDMapping_sql GENE_NAME ACC [IdDuNom $Nom]] " "] 0] set Access [IDMapping_sql ACC+ID ACC $Access] } set Def [Definition $Nom] $db eval {INSERT INTO protein(gscopeid, access, definition) VALUES ($Nom, $Access, $Def)} } FillMutationTable $db $db close return $FichierDb } proc TestCreatePampasDb {} { set db "db" sqlite3 $db [PampasDb] set toto [$db eval {SELECT * FROM Mutation JOIN protein ON protein.pk_protein = mutation.pk_protein WHERE protein.gscopeid = "MG01"}] EspionneL $toto $db close } proc PampasDb {} { set FichierSql "[RepertoireDuGenome]/fiches/pampas.sql" return $FichierSql } proc PkProtein Protein { if {[regexp {^[0-9]+$} $Protein]} { set Pk $Protein } elseif {[EstUnPAB $Protein]} { set Pk [SqlExecForDatabase [PampasDb] "select pk_protein from protein where gscopeId='$Protein'" "GetFirstValue"] } else { set Pk [SqlExecForDatabase [PampasDb] "select pk_protein from protein where access='$Protein'" "GetFirstValue"] } return $Pk } proc MutationListOfPosition {Protein} { set Pk [PkProtein $Protein] set LesPos [SqlExecForDatabase [PampasDb] "select pos from mutation where pk_protein=$Pk order by pos" "GetList"] return $LesPos } proc selectVariants {Protein} { sqlite3 db1 [PampasDb] set lst1 [list] set lst2 [list] db1 eval {SELECT w, pos, m FROM Mutation JOIN Protein ON Protein.pk_protein=Mutation.pk_protein WHERE Protein.gscopeId=$Protein order by pos} { set lst1 [lappend lst1 "$w$pos$m"] set lst2 [lappend lst2 "p.[UT $w]$pos[UT $m]"] } db1 close return [list $lst1 $lst2] } proc setVariantsAndUniprotFiche {Protein} { sqlite3 db1 [PampasDb] set str1 "" set str2 "" set access [db1 eval {SELECT access FROM Protein WHERE gscopeId=$Protein}] set lst3 [list] db1 eval {SELECT w, pos, m , DbSnp FROM Mutation JOIN Protein ON Protein.pk_protein=Mutation.pk_protein WHERE Protein.gscopeId=$Protein order by pos} { set str1 "$w$pos$m" set str2 "p.[UT $w]$pos[UT $m]" if {$DbSnp != ""} { set Db "$DbSnp" } else {set Db ""} set lst3 [lappend lst3 [format "%6s %-10s %s" $str1 $str2 $Db]] } set uniprot [GeneQuid UniprotData $access] db1 close return [concat [join $lst3 "\n"] ";" $uniprot] } proc PampasInfoForAllProteins {} { if { ! [file exists [PampasDb]]} { CreatePampasDb } sqlite3 db1 [PampasDb] set LesNoms [ListeDesPABs] set lesGn {} set lesNbMutation {} set lesSeqLen {} set lesInfosPampas {} set LesSvg {} foreach nom $LesNoms { set gn [IdDuNom $nom] lappend lesGn $gn set nbMutation [db1 eval {SELECT COUNT(Mutation.pk_protein) FROM Mutation JOIN Protein ON Protein.pk_protein=Mutation.pk_protein WHERE Protein.gscopeId=$nom}] lappend lesNbMutation $nbMutation set seqLen [string length [SequenceDuNom $nom]] lappend lesSeqLen $seqLen lappend lesInfosPampas [ExtraitInfo $nom "InfoPampas"] set SvgFile [GscopeFile $nom svg] set Svg "" if {[file exists $SvgFile]} { set Svg [ContenuDuFichier $SvgFile] } lappend LesSvg $Svg } db1 close set reponse "[join $LesNoms { }];[join $lesGn { }];[join $lesNbMutation { }];[join $lesSeqLen { }];[join $lesInfosPampas @];[checkInventory];[join $LesSvg @];[PampasProjectInfo all]" Zpy $reponse return $reponse } ## proc sendJsonToBrowser {json} { #fconfigure stdout -encoding utf-8 ::rivet::headers numeric 200 ::rivet::headers type {application/json; charset=utf-8} ::rivet::headers add Content-Length [string bytelength $json] return $json LogWscope $json } proc SequenceDuNom Nom { set Seq [QueLaSequenceDuTFA [GscopeFile $Nom prottfa]] return $Seq } proc IdDuNom Nom { #LogWscope "IdDuNom $Nom" set Id "" if {$Id == ""} {set Id [ExtraitInfo $Nom "ID:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "AC:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "Alias:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "ValiGN:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "GN:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "NarcisseID:"]} if {$Id == ""} {set Id [ExtraitInfo $Nom "NarcisseAC:"]} if {$Id == ""} {set Id $Nom} return $Id } proc TestJ {} { set seq [QuestionDeScience MG "ret SequenceDuNom MG01"] set id [QuestionDeScience MG "ret IdDuNom MG01"] set d [dict create id $id seq $seq] set j [::json::dict2json $d] LogWscope $j return $j } proc PampasLollipop {Id Pos {Sens ""} {Titre ""} {Couleur ""} {HoHeSp ""} {Compression ""}} { JeMeSignale global PampasLollipop if {$Id=="Reset"} { if {[info exists PampasLollipop]} { unset PampasLollipop } return "" } #rR j'ai greffé l'interrogation PampasLollipop pour les arcs if {$Sens==""} { if {[info exists PampasLollipop($Id,$Pos)]} { return $PampasLollipop($Id,$Pos) } return "" } set Lol {} set Y1 [expr 50 $Sens 5] set OffsetY 20 if {$HoHeSp=="domi"} { set OffsetY 25 } if {$HoHeSp=="homo"} { set OffsetY 30 } set Y2 [expr 50 $Sens $OffsetY] set RX [expr 3.0/$Compression] set RY 3 set TitreCorrige $Titre set TitreCorrige [RajouteMut3LAuTitre $TitreCorrige] lappend PampasLollipop($Pos-$Sens,ListOfTitle) $TitreCorrige LogWscope "PampasLollipop $Id $Pos $Sens $Titre $Couleur $HoHeSp $Compression" set Titres [join $PampasLollipop($Pos-$Sens,ListOfTitle) "
"] LogWscope "$Titres" lappend Lol "" lappend Lol "" lappend Lol " $Titres" lappend Lol "" #AppendAuFichier /home/ripp/msp/AllPampasLollipopTitlesAvant.txt "$Id $Titre" #AppendAuFichier /home/ripp/msp/AllPampasLollipopTitlesApres.txt "$Id $TitreCorrige" lappend PampasLollipop($Pos,ListOfId) $Id if {$HoHeSp!="homo" && $HoHeSp!="domi" } { lappend PampasLollipop(ListOf,Id) $Id set PampasLollipop($Id,Pos) $Pos set PampasLollipop($Id,Sens) $Sens set PampasLollipop($Id,Titre) $Titre set PampasLollipop($Id,Couleur) $Couleur set PampasLollipop($Id,HoHeSp) $HoHeSp set PampasLollipop($Id,Compression) $Compression set PampasLollipop($Id,Y1) $Y1 set PampasLollipop($Id,Y2) $Y2 set PampasLollipop($Pos,Id) $Id } return [join $Lol "\n"] } proc SvgWithFeaturesPourTous {} { foreach Nom [ListeDesPABs] { Espionne $Nom set Svg [SvgWithFeatures $Nom "" 0.2] regsub -all "green1" $Svg "green" Svg regsub -all "sea_green" $Svg "green" Svg lappend LesSvg [Sauve $Svg dans [GscopeFile $Nom svg]] } return $LesSvg } proc SvgWithFeatures {Nom {aListeHoHeMut ""} {aSvg ""} {aSvgPourAli ""}} { set Nom [PPDB Pro $Nom GscopeId] #Création du schema svg avec les features les mutations file mkdir "[RepertoireDuGenome]/svg" set YaCompression 0 set Compression 1.0 if {[regexp {^[0-9\.]+$} $aSvg]} { set Compression $aSvg set aSvg "" set YaCompression 1 } PampasLollipop "Reset" a b c if {$aListeHoHeMut!=""} { upvar $aListeHoHeMut ListeHoHeMut } if {$aSvg !=""} { upvar $aSvg svg } if {$aSvgPourAli !=""} { upvar $aSvgPourAli svgPourAli } set LeSvg {} set dominants {} set homozygous {} set heterozygous {} set special {} set ListeHoHeMut {} set mutpub {} set LesPkM [PPDB Pro $Nom ListOfMutPk] set LesPos {} foreach PkM $LesPkM { set Pos [PPDB Mut $PkM Pos] lappend LesPos $Pos lappend MutEnPos($Pos) [PPDB Mut $PkM] } if {[info exists DejaVu]} { unset DejaVu } set LesPos [lsort -integer $LesPos] set Sens "+" set LesMutations {} foreach Pos $LesPos { foreach MeP $MutEnPos($Pos) { if {[info exists DejaVu($MeP)]} { continue } set DejaVu($MeP) 1 if {$Sens=="+"} { set Sens "-" } else { set Sens "+" } set SensDe($MeP) $Sens lappend LesMutations $MeP } } ######################################################################################################## set Acc $Nom set Sequence "" if {[OnTraite MSP]} { set Acc "[MSP Syn $Nom A]mito_Homo.sapi" } if {$Sequence==""} { set Acc $Nom ; set Sequence [FromMacsim $Nom Sequences $Acc] } if {$Sequence==""} { set Acc [ExtraitInfo $Nom "AC:"] ; set Sequence [FromMacsim $Nom Sequences $Acc] } if {$Sequence==""} { set Acc [ExtraitInfo $Nom "ID:"] ; set Sequence [FromMacsim $Nom Sequences $Acc] } set LesFeatures [FromMacsim $Nom PFAM-A $Acc] regsub -all -nocase {[^a-z]} $Sequence "" Sequence set LongueurSequence [string length $Sequence] if {$Compression==1.0} { set SvgH 100 set SvgW 100% set TransY 0 } else { set SvgH 75 set SvgW 200 set TransY [expr -$SvgH*0.5*$Compression] } #set SvgW [expr $LongueurSequence*$Compression] lappend LeSvg "" lappend LeSvg "" lappend LeSvg "" foreach Mut $LesMutations { lassign $Mut K W M P C T D set Sens $SensDe($Mut) set Titre "$C $T $D" lappend LeSvg [PampasLollipop "Normal" $P $Sens $Titre "lightgreen" "normal" $Compression] } set LesSSi {} set iFeat 0 foreach Feature $LesFeatures { incr iFeat lassign [split $Feature " "] coord cocal ftype FType fstart FStart fstop FStop fcolor FColor fscore FScore fnote FNote lappend LesSSi $FStart $FStop $iFeat } foreach {A B I} $LesSSi { foreach {a b j} $LesSSi { if {$I!=$j && $A<=$a && $b<=$B} { set Reduire($j) 1 } } } set iFeat 0 foreach Feature $LesFeatures { lassign [split $Feature " "] coord cocal ftype FType fstart FStart fstop FStop fcolor FColor fscore FScore fnote FNote set Color $FColor if {[regexp {^[0-9]+$} $FColor] } { set Color [FromMacsim MacsimsColor $FColor] } set LenRect [expr { $FStop-$FStart }] set Y 38 set HY 24 if {[info exists Reduire($iFeat)]} { set Y 42 set HY 16 } set Titre "$FType $FNote $FStart-$FStop" lappend LeSvg "" lappend LeSvg "$Titre" lappend LeSvg "" } lappend LeSvg "" lappend LeSvg "" set Svg [join $LeSvg "\n"] set SvgPourAli $Svg if { [regexp -nocase {x1='([0-9]+)'} $Svg Match X1] \ && [regexp -nocase {x2='([0-9]+)'} $Svg Match X2] \ && [regexp -nocase {stroke-width='([0-9]+)'} $Svg Match S]} { set V1 [expr $X1 - $S/2] set V2 [expr $X2 + $S/2] set SvgPourAli $Svg set Scale [expr 1000./($X2+$X1)] regsub -all {id='} $SvgPourAli "id='PourAli_" SvgPourAli ;#rR attention tous les id regsub {>} $SvgPourAli " viewBox='0 0 1000 100'>" SvgPourAli regsub {scale\([^\)]+\)} $SvgPourAli "scale($Scale, 1.0)" SvgPourAli regsub {width='[0-9\.]+'} $SvgPourAli "width='100%'" SvgPourAli regsub -all {=0} { set KeepExisting 1 } #rR A FAIRE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Zpy "dans CreateGscopeProjectWithPampas" Zpy $ProjName Zpy $TexteSource set RepGen "/gstock/$ProjName" set RepGenLink "/genomics/link/$ProjName" if {[file exists $RepGen] || [file exists $RepGenLink]} { if { ! [OuiOuNon "$RepGen or $RepGenLink already exists. Do I continue anyway ?" 0]} { return "Error. $RepGen already exists." } } if {[FileAbsent $RepGen]} { file mkdir $RepGen } if {[FileAbsent $RepGenLink]} { file link -symbolic $RepGenLink $RepGen} set DeDir "$RepGenLink/DeWeb" file mkdir $DeDir set FichierSource "$DeDir/InfosPampas.txt" if {$TexteSource!=""} { Sauve $TexteSource dans $FichierSource } if {[FileAbsent $FichierSource]} { return "Error. I could not find $FichierSource" } set LesInfosSource [LesLignesDuFichier $FichierSource] if {$Prefixe==""} { set Prefixe $ProjName } foreach Dir [list fiches protembl prottfa infos] { file mkdir "$RepGen/$Dir" } set NbLignes [llength $LesInfosSource] set N 2 if {$NbLignes>99} { set N 3 } if {$NbLignes>999} { set N 4 } if {$NbLignes>9999} { set N 5 } set FNum "${N}.${N}d" set FormatPrefixe "$Prefixe%${N}.${N}d" set Numero 0 set Premier "" foreach Ligne $LesInfosSource { incr Numero if {$Premier==""} { set Premier $Numero } set Dernier $Numero regsub -all { *\t *} $Ligne "\t" Ligne regsub -all {;} $Ligne "" Ligne lassign [split $Ligne "\t"] YourId Id Ac Gn De Ec set Nom [format $FormatPrefixe $Numero] set FichierTfa "$RepGen/prottfa/$Nom" set FichierEmbl "$RepGen/protembl/$Nom" set FichierInfo "$RepGen/infos/$Nom" if {$KeepExisting && [file exists $FichierEmbl] && [ContenuDuFichier $FichierEmbl]!=""} { continue } set LeEmbl [LaSequenceDesBanques $Id $Ac "OnVeutEmbl"] set Embl [join $LeEmbl "\n"] set TFA [SequenceFormatTFA $Embl "$Nom $Id $Ac $Gn $De $Ec" "protembl"] Espionne $TFA set LeInfo {} lappend LeInfo "Nom: $Nom" set Alias "" lappend LeInfo "InfoPampas: $Ligne" if {$Id!=""} { lappend LeInfo "ID: $Id" } if {$Ac!=""} { lappend LeInfo "AC: $Ac" ; set Alias $Ac } if {$Gn!=""} { lappend LeInfo "ValiGN: $Gn" ; set Alias $Gn } if {$De!=""} { lappend LeInfo "ValiDE: $De" } if {$Ec!=""} { lappend LeInfo "Ec: $De" } set Info [join $LeInfo "\n"] Sauve $Embl dans $FichierEmbl Sauve $TFA dans $FichierTfa Sauve $Info dans $FichierInfo } if {$KeepExisting && [file exists "[RepertoireDuGenome]/fiches/bornesdespabs"]} { return $RepGen } CreeBornesDesPABsTroisGradins $Premier $Dernier $Prefixe "" "" $FNum $RepGen CreeFichierMiniConfig $Prefixe $FNum "Prot" "" "" "" $RepGen AppendAuFichier "$RepGen/beton/miniconfig" "OnTraiteLike Pampas" PampasProjectInfo Mail $Mail Description $Description PublicPrivate $PublicPrivate set FichierSizeMsf "$RepGen/fiches/sizemsf" if {[FileAbsent $FichierSizeMsf]} { Sauve "90 90 90 0.001 respectively : at_least max_with_complete_genomes max_pdb expect" dans $FichierSizeMsf } set FichierSizeDescriptif "$RepGen/fiches/sizedescriptif" if {[FileAbsent $FichierSizeDescriptif]} { Sauve "90 120 90 10 respectively : at_least max_with_complete_genomes max_pdb expect" dans $FichierSizeDescriptif } set RepBanques "$RepGen/banques" file mkdir $RepBanques NousAllonsAuBoulot $RepBanques if {[FileAbsent blastpdatabase]} { exec ln -s /blast/pdbsprot.pal blastpdatabase } OnRevientDuBoulot NousAllonsAuBoulot "[PampasServerDir]/assets/GscopeLink" exec ln -s $RepGenLink OnrevientDuBoulot exec chmod -R g+w $RepGen QuestionDeScience $ProjName CompletePampasProject return $RepGen } proc PampasProjectInfo {args} { set PampasProjectInfoFile [Fiches "PampasProjectInfo.txt"] if {$args=={}} { return [ContenuDuFichier $PampasProjectInfoFile] } #rR on charge d'abord l'existant if {[FileAbsent $PampasProjectInfoFile]} { set LesKV {} } else { set LesKV [ContenuDuFichier $PampasProjectInfoFile] } array set PPI $LesKV if {[llength $args]>1} { foreach {K V} $args { regsub -all {[\#;]} $V " " V set PPI($K) $V } return [Sauve [array get PPI] dans $PampasProjectInfoFile] } if {$args=="all"} { return [join [array get PPI] "#"] } if {[info exists PPI($args)]} { return $PPI($args) } return "-" } proc CompletePampasProject {} { #rR Attention depuis 2020/05 on peut créer un pampas à partir d'une série d'alignements #rR Voir plus haut CreatePampasMsfFromExistingTfaOrMsf #rR Ici on part plutôt d'une serie d'access #rR Cette procedure peut s'executer en CafeDesSciences CreatePampasDb BlastPPourTous Imagette3dDuPdbBlastPourTous DbClustalPourTous LeonEtMacsimPourTous SvgWithFeaturesPourTous } proc Imagette3dDuPdb {Access {Rep ""} {Nom ""}} { if {$Rep==""} { set Rep "." } if {$Nom==""} { set Nom $Access } regsub -nocase {^PDB(:|_)} $Access "" Access regsub {_.*} $Access "" Access set Access [string tolower $Access] if { ! [regexp {[1-9]([0-9a-z][0-9a-z])[0-9a-z]} $Access Match Milieu]} { continue } set Url "https://cdn.rcsb.org/images/rutgers/$Milieu/$Access/$Access.pdb-500.jpg" Espionne $Url set FichierJpg "$Rep/${Nom}_$Access.jpg" set FileType "" catch {exec wget -O $FichierJpg $Url} catch {set FileType [exec file $FichierJpg]} Espionne $FileType if {[regexp "JPEG image" $FileType]} { set FichierPng "$Rep/$Nom.png" exec convert $FichierJpg -transparent white $FichierPng exec convert $FichierPng -resize 64 $FichierPng return $FichierPng } return "" } proc Imagette3dDuPdbBlastPourTous {} { set LesImagettes {} foreach Nom [ListeDesPABs] { if {[file exists "[RepertoireDuGenome]/s3d/$Nom.png"]} { continue } Espionne $Nom set FichierImagette [Imagette3dDuPdbBlast $Nom] Espionne $FichierImagette if {$FichierImagette!=""} { lappend LesImagettes $FichierImagette } } return $LesImagettes } proc Imagette3dDuPdbBlast {Nom} { set FichierBlast [GscopeFile $Nom "blastp"] DecortiqueBlast $FichierBlast "" "" Query lBanqueId set Rep "[RepertoireDuGenome]/s3d" file mkdir $Rep foreach Access $lBanqueId { if { ! [EstUnAccessPDB $Access]} { continue } set FichierPng [Imagette3dDuPdb $Access $Rep $Nom] if {$FichierPng!=""} { return $FichierPng } } return "" } proc RelatedPDBs {Nom} { set FichierBlast [GscopeFile $Nom "blastp"] DecortiqueBlast $FichierBlast "" "" Query lBanqueId "" lDE lProfil lPN set LesPDBs {} foreach Access $lBanqueId DE $lDE PN $lPN { if { ! [EstUnAccessPDB $Access]} { continue } regsub -nocase {^PDB(:|_)} $Access "" Access lappend LesPDBs [format "%-8s %6s %s" $Access $PN $DE] } return [join $LesPDBs "\n"] }