#rR Debut de gscope_daedalus.tcl proc ChampDaedalus {Champ {Access ""} {Nom ""}} { global ChampDaedalusAccessCourant ChampDaedalusNomCourant if {$Access==""} { set Access $ChampDaedalusAccessCourant } else { set ChampDaedalusAccessCourant $Access } if {$Nom ==""} { set Nom $ChampDaedalusNomCourant } else { set ChampDaedalusNomCourant $Nom } set Hit [DaedalusHit $Access $Nom] if {$Hit==""} { return "" } set Valeur [ValeurDeLaBalise $Champ Hit "NePasRogner"] return $Valeur } proc DaedalusHit {Access {Nom ""}} { global DaedalusHit DaedalusNomCourant if {$Nom==""} { set Nom $DaedalusNomCourant } else { set DaedalusNomCourant $Nom } if {[info exists DaedalusHit($Nom,$Access)]} { return [set DaedalusHit($Nom,$Access)] } if {[info exists DaedalusHit($Nom,"EstCharge")]} { return "" } if {[info exists DaedalusHit]} { unset DaedalusHit } set FichierDaedalus "[RepertoireDuGenome]/daedalushits/$Nom" set DaedalusHit($Nom,"EstCharge") 1 if { ! [file exists $FichierDaedalus]} { return "" } foreach Ligne [LesLignesDuFichier $FichierDaedalus] { if { ! [regexp -nocase {[a-z]} $Ligne]} { continue } scan $Ligne "%s %s" BH AccessLu regsub " *$BH +$AccessLu" $Ligne "" Hit regsub {>>$} $Hit ">" Hit set LesACs [ValeurDeLaBalise AC Hit "NePasRogner"] set DaedalusHit($Nom,$AccessLu) $Hit set Hit [string trim $Hit] set DaedalusHit($Nom,[string toupper $AccessLu]) $Hit foreach AC [split $LesACs " "] { set DaedalusHit($Nom,$AC) $Hit set DaedalusHit($Nom,[string toupper $AC]) $Hit } } return [DaedalusHit $Access $Nom] } proc TTT {} { set A "Q92947-2" if {[catch {set Sortie [exec getz -e -cs "\\\|" \[VARSPLIC:$A\]>protein]} Message]} { FaireLire "error from getz \n$Message" exit } Espionne $Sortie exit } proc InformeAvecDaedalusHitPourTous {{Quoi ""} {Qui ""}} { if {$Quoi==""} { set Quoi "OS_OC_GN_DE" } if {[regexp "OS" $Quoi]} { set FichierOrganismesDesPABs "[RepertoireDuGenome]/fiches/organismesdespabs" if {[file exists $FichierOrganismesDesPABs]} { foreach Ligne [LesLignesDuFichier $FichierOrganismesDesPABs] { scan $Ligne "%s" NomLu set DejaVu($NomLu,OS) 1 } } } if {$Qui==""} { set Qui [ListeDesPABs] } if {[EstUnPAB $Qui]} { set Qui [list $Qui] } foreach Nom $Qui { if {[EstUnPABMute $Nom]} { continue } Espionne "I'll examine $Nom" if {[regexp "OS" $Quoi] && [info exists DejaVu($Nom,OS)]} { continue } set Narcisse [Narcisse $Nom] if {[regexp "OS" $Quoi] && [regexp "^cio" $Narcisse]} { set Valeur "7719 <>" set Ligne "$Nom $Valeur" InformeSansDemander $Nom "=Organism: $Valeur" AppendAuFichier $FichierOrganismesDesPABs $Ligne if {[regexp "OC" $Quoi]} { scan $Valeur "%s" TaxId set Classe [TaxClass $TaxId "" "" Name] if {$Classe!=""} { Espionne "$Nom Taxonomy: $Classe" InformeSansDemander $Nom "=Taxonomy: $Classe" } } continue } set FHit "[RepertoireDuGenome]/daedalushits/$Nom" if { ! [file exists $FHit]} { continue } set Texte [ContenuDuFichier $FHit] set OX "" if {[info exists ExistePlusLoin]} { unset ExistePlusLoin } while {[set BH [ValeurDeLaBalise "BlastHit" Texte]]!=""} { set AC [ValeurDeLaBalise AC BH NePasRogner] set ID [ValeurDeLaBalise ID BH NePasRogner] set OX [ValeurDeLaBalise OX BH NePasRogner] set GN [ValeurDeLaBalise GN BH NePasRogner] if {[regexp "Name=" $GN]} { regsub -all ";" $GN " " set GN [StringApres "Name=" dans $GN] } set DE [ValeurDeLaBalise DE BH NePasRogner] if {[EstUnAccessPDB $Narcisse] || \ [string equal -nocase $ID $Narcisse] || [regexp -nocase $Narcisse $AC]} { break } if { ! [info exists ExistePlusLoin]} { set ExistePlusLoin [regexp -nocase $Narcisse $Texte] } if {$ExistePlusLoin} { continue } if { 0 && [OuiOuNon "For $Nom ID=$ID AC=$AC the hit \n$BH\nis not Narcisse $Narcisse\nDo I continue parsing\n\ [string range $Texte 0 500] ?"]} { continue } break } if {[regexp "OS" $Quoi] && $OX!=""} { set LesNames {} foreach TaxId [split $OX " "] { lappend LesNames "<[Tax $TaxId]>" } set Valeur "$OX <[join $LesNames "+"]>" set Ligne "$Nom $Valeur" AppendAuFichier $FichierOrganismesDesPABs $Ligne InformeSansDemander $Nom "=Organism: $Valeur" if {[regexp "OC" $Quoi]} { scan $OX "%s" TaxId set Classe [TaxClass $TaxId "" "" Name] if {$Classe!=""} { Espionne "$Nom Taxonomy: $Classe" InformeSansDemander $Nom "=Taxonomy: $Classe" } } } if {[regexp "GN" $Quoi] && $GN!=""} { InformeSansDemander $Nom "ValiGN: $GN" } if {[regexp "DE" $Quoi] && $DE!=""} { InformeSansDemander $Nom "ValiDE: $DE" } } if {[regexp "OS" $Quoi]} { return $FichierOrganismesDesPABs } return } proc DaedalusHitsDuBlastPPourTous {{EnProcessExterne ""} {CommenceIci ""}} { set Liste [ListeDesPABs] if {$CommenceIci!=""} { if {[set Trouve [lsearch -exact $Liste $CommenceIci]]>=0} { set Liste [lrange $Liste [incr Trouve] end] } } if { $EnProcessExterne == "EnProcessExterne" } { set MaxDansMemeProcess 10 } else { set MaxDansMemeProcess 999999 } set RepDaedalusHits "[RepertoireDuGenome]/daedalushits" if { ! [file exists $RepDaedalusHits]} { file mkdir $RepDaedalusHits } set Keep 1 if {[glob -nocomplain "$RepDaedalusHits/*"]!={}} { set Keep [OuiOuNon "Some files exist in $RepDaedalusHits/\nDo I keep all of them ?"]} { } set LesNouveaux {} set DejaFaitsDansMemeProcess 0 foreach Nom $Liste { if {[Fantome $Nom]} { continue } set FichierDaedalusHits "$RepDaedalusHits/$Nom" if {$Keep && [file exists $FichierDaedalusHits]} { continue } if {[PasTouche $Nom]} { continue } Espionne "DaedalusHitsDuBlastPPourTous $Nom" AppendAuFichier "[RepertoireDuGenome]/daedalushits.log" "[pid] $FichierDaedalusHits [Date Nice]" set Dae [DaedalusHits "DuBlastP" $Nom $FichierDaedalusHits] FinPasTouche $Nom if {$Dae==""} { continue } lappend LesNouveaux $Dae if {[incr DejaFaitsDansMemeProcess] > $MaxDansMemeProcess} { exec gscope yes DaedalusHitsDuBlastPPourTous "EnProcessExterne" $Nom & exit } } return $LesNouveaux } proc DaedalusHits {Source NomOuListeOuTexte {FichierDaedalusHits ""} {KeepDaedalus ""}} { Wup "Source can be DuRepDaedalus DuBlastP DeLaListe DuFlatText" Wup "if KeepDadalus it returns the list FichierDaedalus RepDaedalus" set KeepDaedalus [regexp -nocase "KeepDaedalus" $KeepDaedalus] set Nom "NoName" if {$Source=="DuRepDaedalus"} { set RepDaedalus $NomOuListeOuTexte } else { if {$Source=="DuFlatText"} { set FlatTexte $NomOuListeOuTexte } if {$Source=="DuBlastP"} { set Nom $NomOuListeOuTexte set FlatTexte [DaedalusFlatFileDuBlastP $Nom "GetText"] } if {$Source=="DeLaListe"} { set FichierOuListe $NomOuListeOuTexte set FlatTexte [DaedalusFlatFileDeLaListe $FichierOuListe "GetText"] } if {$FlatTexte==""} { return {} } set RepDaedalus [DaedalusSrsbuild $FlatTexte] } if {$FichierDaedalusHits==""} { set RepDaedalusHits "[RepertoireDuGenome]/daedalushits" if { ! [file exists $RepDaedalusHits]} { file mkdir $RepDaedalusHits } set FichierDaedalusHits "$RepDaedalusHits/$Nom" } set LesBeauxHits {} # set Methode "DaedalusProtView" set Methode "ExtractionDeProtein" if {$Methode=="ExtractionDeProtein"} { set ExpDaedalus [DaedalusGetz $RepDaedalus "GetExpects"] if {[info exists ExpectDe]} { unset ExpectDe } set LesPBDs {} set LesACCESS {} set LesPDBs {} set LesGscopes {} foreach Ligne [split $ExpDaedalus "\n"] { set Expect 0.00099999 scan $Ligne "%s %s" Access Expect set Access [StringApres "DAEDALUS:Flat_" dans $Access] set ACCESS [string toupper $Access] if {[regexp {_[0-9]+$} $Access]} { regsub {_[0-9]+$} $Access "" ProtAccess set Varsplic($ACCESS) $ProtAccess } if {[regexp {\-[0-9]+$} $Access]} { regsub {\-[0-9]+$} $Access "" ProtAccess set Varsplic($ACCESS) $ProtAccess } if {[EstUnAccessPDB $ACCESS]} { set EMBL [EMBLduPDB $ACCESS "ACFirst"] if {$EMBL!=""} { lappend LesPDBs $EMBL } } if {[EstUnAccessDUneBanqueBlastPDeGscope $ACCESS]} { set EMBL [EMBLdeGscope $ACCESS "ACFirst"] if {$EMBL!=""} { lappend LesGscopes $EMBL } if {[regexp {[\:_]} $ACCESS Car]} { set ACCESS [StringApres $Car dans $ACCESS] } } lappend LesACCESS $ACCESS set ExpectDe($ACCESS) $Expect } set OutDaedalus [DaedalusGetz $RepDaedalus "GetInfos"] set FichierOutDaedalus "[RepertoireDuGenome]/daedalushits/$Nom.daedalus_output" Sauve $OutDaedalus dans $FichierOutDaedalus if { ! $KeepDaedalus } { file delete -force $RepDaedalus} set SansV 1 set LesLignes [split $OutDaedalus "\n"] LConcat LesLignes $LesPDBs LConcat LesLignes $LesGscopes lappend LesLignes "AC Thats all folks" set LesLignes [split [join $LesLignes "\n"] "\n"] set ClefPrecedente "" foreach Ligne $LesLignes { if { ! [regexp -nocase {[a-z]} $Ligne]} { continue } if { [regexp {^ } $Ligne]} { continue } if {$SansV} { scan $Ligne "%s" Clef regsub "$Clef " $Ligne "" Ligne if {$Clef=="AC" && $ClefPrecedente=="AC"} { set LesNouveauxACs [LesMotsDeLaLigne $Ligne] set LesACs [concat $LesACs $LesNouveauxACs] continue } set ClefPrecedente $Clef if {$Clef=="AC"} { if {[info exists PremierAC]} { if {[info exists ExpectDe($PremierAC)]} { set Expect [set ExpectDe($PremierAC)] } else { set Expect 0.000999 } set PREMIERAC [string toupper $PremierAC] if {$LesOXs=={} && $LesOSs!={}} { set PremTax [Tax [lindex $LesOSs 0]] if { ! [regexp {^[0-9]+$} $PremTax]} { set PremTax -1 } set LesOXs [list $PremTax] } if {$LesOSs=={} && $LesOXs!={}} { set PremTax [lindex $LesOXs 0] if { ! [regexp {^[0-9]+$} $PremTax]} { set PremOS "Inconnu inconnu" } else { set PremOS [Tax $PremTax] } set LesOSs [list $PremOS] } set SesBeauxHits($PREMIERAC) "\ \ \ \ \ \ \ \ " unset PremierAC } if {[regexp "Thats all folks" $Ligne]} { break } set ID "No_ID" set DE "No_DE" ; set LesDEs {} set OS "No_OS" ; set LesOSs {} set OX "No_OX" ; set LesOXs {} set GN "No_GN" ; set LesGNs {} set LesACs [LesMotsDeLaLigne $Ligne] set PremierAC [lindex $LesACs 0] continue } if {$Clef=="ID"} { scan $Ligne "%s" ID continue } if {$Clef=="OX"} { regsub -nocase "NCBI_TaxID=" $Ligne "" Ligne regsub -all {[^0-9 ]} $Ligne "" Ligne set LesOXs [LesMotsDeLaLigne $Ligne] set OX [lindex $LesOXs 0] continue } if {$Clef=="GN"} { regsub {\.$} $Ligne "" Ligne lappend LesGNs $Ligne set GN [lindex $LesGNs 0] continue } if {$Clef=="OS"} { regsub {\.$} $Ligne "" Ligne lappend LesOSs $Ligne continue } if {$Clef=="DE"} { lappend LesDEs $Ligne continue } } else { set LesChamps [split $Ligne "|"] set Description "" ScanLaListe $LesChamps Hit ID Acc NCBI_TaxId Organism Description set BonneDescription [join [lrange $LesChamps 5 end] " "] if {$Description!=$BonneDescription} { FaireLire "CS character in following line\n$Ligne" } lappend LesBeauxHits "$Acc $NCBI_TaxId $Description $Organism" } } foreach ACCESS $LesACCESS { if { ! [info exists SesBeauxHits($ACCESS)]} { if { ! [info exists Varsplic($ACCESS)]} { continue } if { ! [info exists SesBeauxHits([set Varsplic($ACCESS)])]} { continue } set Hit [set SesBeauxHits([set Varsplic($ACCESS)])] regsub -nocase {]+>} $Hit "" Hit regsub -nocase {]+>} $Hit "" Hit regsub -nocase {]+>} $Hit "" Hit set SesBeauxHits($ACCESS) $Hit } lappend LesBeauxHits [set SesBeauxHits($ACCESS)] } } if {$Methode=="DaedalusProtView"} { Wup "Attention ne traite pas les PDB pour le moment" set ViewDaedalus [DaedalusGetz $RepDaedalus "DaedalusProtView"] foreach Ligne [split $ViewDaedalus "//"] { set LesChamps [split $Ligne "|"] ScanLaListe $LesChamps ID OC OS TaxId DE Expect regsub -all {[^0-9]} $TaxId " " TaxId set OX [join [LesMotsDeLaLigne $TaxId] " "] regsub -all "\n" $DE " " DE lappend LesBeauxHits "\ \ \ \ \ \ " } } if {$FichierDaedalusHits=="RetourneLesHits"} { return $LesBeauxHits } set FDH [SauveLesLignes $LesBeauxHits dans $FichierDaedalusHits] if {[file exists $FichierOutDaedalus]} { file delete -force $FichierOutDaedalus } if {$KeepDaedalus} { return [list $FDH $RepDaedalus] } return $FDH } proc DaedalusGetz {Repertoire {Commande ""}} { if {$Commande==""} { set Commande "GetInfos" } NousAllonsAuBoulot $Repertoire set Sortie "" if {$Commande=="GetInfos"} { if {[catch {set Sortie [exec getz -f "AccNumber ID NCBI_TaxId GeneName Organism Description" -cs "\\\|" \[daedalus-id:*\]>protein]} Message]} { FaireLire "error from getz \n$Message" } } if {$Commande=="GetExpects"} { if {[catch {set Sortie [exec getz -vf "Expect" \[daedalus:*\]]} Message]} { FaireLire "error from getz \n$Message" } } if {$Commande=="DaedalusProtView"} { if {[catch {set Sortie [exec getz -view DaedalusProtView \[daedalus:*\]]} Message]} { FaireLire "error from getz \n$Message" } } OnRevientDuBoulot return $Sortie } proc DaedalusSrsbuild {FlatFileOuTexte {Nom ""} {RepTmp ""}} { if {[regexp "\n" $FlatFileOuTexte]} { set FlatTexte $FlatFileOuTexte if {$Nom==""} { set Nom "Flat" } } else { set FlatTexte [ContenuDuFichier $FlatFileOuTexte] if {$Nom==""} { set Nom [file tail $FlatFileOuTexte] } } if { ! [regexp ".daedalus$" $Nom]} { append Nom ".daedalus" } if {$RepTmp==""} { set RepTmp [TmpFile Daedalus] } if { ! [file exists $RepTmp]} { file mkdir $RepTmp } NousAllonsAuBoulot $RepTmp while {[file exists $Nom] && [ContenuDuFichier $Nom]!=$FlatTexte} { if {[OuiOuNon "[pwd]/$Nom already exists\nCan I replace it with the new Daedalus flatfile ?"]} { break } } Sauve $FlatTexte dans $Nom if {[catch {exec srsbuild -w daedalus } Message] } { global DejaVuSrsError if { ! [info exists DejaVuSrsError]} { FaireLire "error from srsbuild BUT I'll continue ... may be it isn't important ...\n$Message" set DejaVuSrsError 1 } } if {[catch {exec srsbuild -l daedalus } Message] } { global DejaVuSrsError if { ! [info exists DejaVuSrsError]} { FaireLire "error from srsbuild BUT I'll continue ... may be it isn't important ...\n$Message" set DejaVuSrsError 1 } } OnRevientDuBoulot return $RepTmp } proc DaedalusFlatFileDuBlastP {NomOuFichier {Sortie ""} {SeuilExpect ""} {MaxListe ""}} { Wup "Sortie can be GetText (to return the text) or the name of the file to create" Wup "if Sortie is empty see below" set NomPossible [file tail $NomOuFichier] if {$Sortie==""} { set Sortie "[TmpFile $NomPossible .].daedalus" } if {$SeuilExpect=="" || $MaxListe==""} { scan [TailleDuDescriptifDuBlastP] "%d %d %d %f" MinObliges MaxVoulus MaxPDB SeuilExpectVoulu } if {$SeuilExpect==""} { set SeuilExpect $SeuilExpectVoulu} if {$MaxListe==""} { set MaxListe $MaxVoulus } if { ! [regexp "/" $NomOuFichier] && [EstUnPAB $NomOuFichier]} { set Nom $NomOuFichier set Fichier "[RepertoireDuGenome]/blastp/$Nom" } else { set Nom [file tail $NomOuFichier] set Fichier $NomOuFichier } if { ! [file exists $Fichier]} { return "" } global ScoreDansProfil ; set ScoreDansProfil 1 set n [DecortiqueBlast $Fichier $SeuilExpect $MaxListe Query lBanqueId lAccess lDE lScore lPN] set Daedale {} set NomQuery [file tail $Query] foreach BanqueId $lBanqueId Access $lAccess DE $lDE Score $lScore PN $lPN { if {[EstUnAccessDUneBanqueBlastPDeGscope $BanqueId]} { set Access $BanqueId } if {[regexp {^\(.+\)$} $Access]} { regsub -all {[\(\)]} $Access "" Access } if {[regexp -nocase {VARSPLIC\:} $BanqueId]} { set Access [StringApres ":" dans $BanqueId] } lappend Daedale "ID $Access" lappend Daedale "BL Probe=$NomQuery ;Score=$Score ;Expect=$PN ;" if {[EstUnAccessPDB $BanqueId]} { regsub -nocase {_[A-Z]*$} $BanqueId "" BanqueIdSansChaine lappend Daedale "DR pdb;$BanqueIdSansChaine;." lappend Daedale "S0 $BanqueId pdb" } elseif {[regexp -nocase {VARSPLIC\:} $BanqueId]} { regsub "VARSPLIC" $BanqueId "PROT" ProtId if {[regexp {_[0-9]+} $ProtId]} { regsub {_[0-9]+} $ProtId "" ProtId regsub {_[0-9]+} $Access "" ProtAccess } if {[regexp {\-[0-9]+} $ProtId]} { regsub {\-[0-9]+} $ProtId "" ProtId regsub {\-[0-9]+} $Access "" ProtAccess } lappend Daedale "DR prot;$ProtAccess;." lappend Daedale "S0 $ProtId $ProtAccess" } elseif {[regexp -nocase {SW\:|SPT\:|SPTNEW\:} $BanqueId]} { lappend Daedale "DR prot;$Access;." lappend Daedale "S0 $BanqueId $Access" } else { lappend Daedale "DR xxx;$Access;." lappend Daedale "S0 $BanqueId $Access" } lappend Daedale "//" } if {$Sortie=="GetText"} { return [join $Daedale "\n"] } return [SauveLesLignes $Daedale dans $Sortie] } proc DaedalusFlatFileDeLaListe {ListeOuFichier {Sortie ""} {NomPossible ""}} { Wup "Sortie can be GetText (to return the text) or the name of the file to create" Wup "if Sortie is empty see below" if {$NomPossible==""} { set NomPossible "FlatFile" } if {[llength $ListeOuFichier]==1} { set Liste [LesLignesDuFichier $ListeOuFichier] if {$NomPossible=="FlatFile"} { set NomPossible [file tail $ListeOuFichier] } } else { set Liste $ListeOuFichier } if {$Sortie==""} { set Sortie "[TmpFile $NomPossible .].daedalus" } set Daedale {} foreach Ligne $Liste { scan $Ligne "%s" Access lappend Daedale "ID $Access" lappend Daedale "DR prot;$Access;." lappend Daedale "//" } if {$Sortie=="GetText"} { return [join $Daedale "\n"] } return [SauveLesLignes $Daedale dans $Sortie] }