Index by: file name |
procedure name |
procedure call |
annotation
gscope_ucsc.tcl
(annotations | original source)
#rR Debut de gscope_ucsc.tcl
#rR Attention UCSC compte a partir de 0
proc ConcordanceNmNuctfa {} {
foreach Nom [ListeDesPABs] {
set FiNuc [GscopeFile $Nom nuctfaRefMrna]
if {[FileAbsent $FiNuc]} {
set MyRefMrna "NotFound"
} else {
set Entete [PremiereLigneDuFichier $FiNuc]
scan $Entete "%s %s" N MyRefMrna
}
set FiLoc [GscopeFile $Nom LocCdsOnRefMrna]
if {[FileAbsent $FiLoc]} {
set NmLoc "NotFound"
} else {
set LigneLoc [PremiereLigneDuFichier $FiLoc]
set NmLoc [lindex [LesMotsDuTexte $LigneLoc] 4]
}
if {$MyRefMrna!=$NmLoc} {
Espionne "$Nom $MyRefMrna!=$NmLoc"
} else {
Espionne "$Nom $MyRefMrna"
}
}
}
proc LocaliseCdsOnMyRefMrna {{Liste ""}} {
#rR On a fait les tblastn des prottfa sur refMrna
set DirName "LocCdsOnRefMrna"
file mkdir [RepertoireDuGenome]/$DirName
if {$Liste==""} { set Liste [ListeDesPABs] }
foreach Nom $Liste {
set LocFile [GscopeFile $Nom $DirName]
if {[file exists $LocFile]} { continue }
if {[info exists DSdeDQ]} { unset DSdeDQ ; unset FSdeFQ }
if {[info exists DejaVu]} { unset DejaVu }
set FiTBlastN [GscopeFile $Nom "tblastn_Homo_sapiens_refMrna_hg19"]
if {[FileAbsent $FiTBlastN]} { continue }
set f [open $FiTBlastN "r"]
set NoHitsFound 0
while {[gets $f Ligne]>=0} {
if {[regexp "No hits found" $Ligne]} { set NoHitsFound 1 ; break }
if {[string index $Ligne 0]!=">"} { continue }
scan $Ligne %s NM
set NM [string trim $NM ">"]
gets $f Ligne
set LongNM [IntegerApres "Length =" dans $Ligne]
break
}
if {$NoHitsFound } { continue }
#Espionne ""
#Espionne $Nom
gets $f Ligne
gets $f Ligne
if { ! [regexp {Score = +([0-9\.\+\-e]+) bits \(([0-9]+)\)\, Expect[^ ]* = ([^,]+),} $Ligne Match Score ScoreExact Expect]} { FaireLire "pas de Score dans $Ligne"; exit }
#if {[regexp 1.001 $Ligne]} { Espionne "$Nom $Ligne"; exit }
if {[regexp "^e" $Expect]} { set Expect "1.$Expect"}
if {[regexp "e" $Score]} { set Score 9999 }
gets $f Ligne
if { ! [regexp {Identities = ([0-9]+)/([0-9]+) \(([0-9]+)%\)} $Ligne Match IdentUp IdentDown PId]} { FaireLire "pas de Identities dans $Ligne"; exit }
gets $f Ligne
set Frame $Ligne
set DebQuery ""
set DebSbjct ""
set FinQuery ""
set FinSbjct ""
set CaSuffit 0
while {[gets $f Ligne]>=0} {
if {[regexp {Score } $Ligne] || [regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
if {[regexp {^Query: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DQ FQ]} {
if {$DebQuery==""} { set DebQuery $DQ }
set FinQuery $FQ
}
if {[regexp {^Sbjct: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DS FS]} {
if {$DebSbjct==""} { set DebSbjct $DS }
set FinSbjct $FS
}
}
close $f
set Message [format "%s %5d - %5d %-10s %5d - %5d %s %4d/%4d %3d%% %5d %5d %f" $Nom $DebQuery $FinQuery $NM $DebSbjct $FinSbjct $Frame $IdentUp $IdentDown $PId $Score $ScoreExact $Expect]
Espionne $Message
Sauve $Message dans $LocFile
}
}
proc LocaliseMyRefMrna {{Liste ""}} {
#rR On a fait les blastn des refMrna sur le genome humain
set DirName "LocRefMrna"
file mkdir [RepertoireDuGenome]/$DirName
if {$Liste==""} { set Liste [ListeDesPABs] }
foreach Nom $Liste {
set LocFile [GscopeFile $Nom $DirName]
if {[file exists $LocFile]} { continue }
if {[info exists DSdeDQ]} { unset DSdeDQ ; unset FSdeFQ }
if {[info exists DejaVu]} { unset DejaVu }
set FiBlastN [GscopeFile $Nom "blastnHomo_sapiens_2009"]
if {[FileAbsent $FiBlastN]} { continue }
Espionne $Nom
set f [open $FiBlastN "r"]
set NoHitsFound 0
while {[gets $f Ligne]>=0} {
if {[regexp "No hits found" $Ligne]} { set NoHitsFound 1 ; break }
if {[string index $Ligne 0]!=">"} { continue }
set Chr [string trim [string range $Ligne 1 end]]
gets $f Ligne
set LongChr [IntegerApres "Length =" dans $Ligne]
break
}
if {$NoHitsFound } { close $f; continue }
#Espionne ""
#Espionne $Nom
gets $f Ligne
gets $f Ligne
set LesDebQuery {}
set LesFinQuery {}
set LesDebSbjct {}
set LesFinSbjct {}
set LesDQDejaVu {}
set LesFQDejaVu {}
set Skip 0
while 1 {
if { ! [regexp {Score = +([0-9\.\+\-e]+) bits \(([0-9]+)\)\, Expect = (.+)$} $Ligne Match Score ScoreExact Expect]} { FaireLire "$Nom pas de Score dans $Ligne"; set Skip 1; break }
#if {[regexp 1.001 $Ligne]} { Espionne "$Nom $Ligne"; exit }
if {[regexp "^e" $Expect]} { set Expect "1.$Expect"}
if {[regexp "e" $Score]} { set Score 9999 }
gets $f Ligne
if { ! [regexp {Identities = ([0-9]+)/([0-9]+) \(([0-9]+)%\)} $Ligne Match IdentUp IdentDown PId]} { FaireLire "$Nom pas de Identities dans $Ligne"; set Skip 1; continue }
gets $f Ligne
set Strand $Ligne
if {[regexp "Minus" $Strand]} { set Strand "R" } else { set Strand "F" }
while 1 {
set DebQuery ""
set DebSbjct ""
set FinQuery ""
set FinSbjct ""
set CaSuffit 0
while {[gets $f Ligne]>=0} {
#Espionne $Ligne
if {[regexp {Score } $Ligne] || [regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
if {[regexp {^Query: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DQ FQ]} {
if {$DebQuery==""} { set DebQuery $DQ }
set FinQuery $FQ
}
if {[regexp {^Sbjct: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DS FS]} {
if {$DebSbjct==""} { set DebSbjct $DS }
set FinSbjct $FS
}
}
set Vu 0
foreach DQDV $LesDQDejaVu FQDV $LesFQDejaVu {
if {$DQDV <= $DebQuery && $FinQuery <= $FQDV} { set Vu 1; break }
}
if { ! $Vu } {
lappend LesDQDejaVu $DebQuery
lappend LesFQDejaVu $FinQuery
lappend LesDebQuery $DebQuery
lappend LesFinQuery $FinQuery
lappend LesDebSbjct $DebSbjct
lappend LesFinSbjct $FinSbjct
}
if {[regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
}
if {[regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
}
close $f
if {$Skip} { continue }
foreach DQ $LesDebQuery DS $LesDebSbjct {
set DSdeDQ($DQ) $DS
}
set LesDebQuery [lsort -integer $LesDebQuery]
set LesDebSbjct {}
foreach DQ $LesDebQuery {
lappend LesDebSbjct $DSdeDQ($DQ)
}
foreach FQ $LesFinQuery FS $LesFinSbjct {
set FSdeFQ($FQ) $FS
}
set LesFinQuery [lsort -integer $LesFinQuery]
set LesFinSbjct {}
foreach FQ $LesFinQuery FS $LesFinSbjct {
lappend LesFinSbjct $FSdeFQ($FQ)
}
set LeMessage {}
foreach DQ $LesDebQuery FQ $LesFinQuery DS $LesDebSbjct FS $LesFinSbjct {
if {[info exists DejaVu($DQ-$FQ)]} { continue }
set DejaVu($DQ-$FQ) 1
set Message [format "%s %5d - %5d %-5s %12d - %12d %s %4d/%4d %3d%% %5d %5d %f" $Nom $DQ $FQ $Chr $DS $FS $Strand $IdentUp $IdentDown $PId $Score $ScoreExact $Expect]
lappend LeMessage $Message
}
SauveLesLignes $LeMessage dans $LocFile
}
}
proc MyRefMrna {} {
set RepTbn "tblastn_Homo_sapiens_refMrna_hg19"
set RepRefMrna "nuctfaRefMrna"
file mkdir "[RepertoireDuGenome]/$RepRefMrna"
foreach Nom [ListeDesPABs] {
set Fichier [GscopeFile $Nom $RepTbn]
if {[FileAbsent $Fichier]} { continue }
DecortiqueBlast $Fichier "" "" "" lBanqueId lAccess lScore lPN lPartieSegAli
Espionne "[lindex $lBanqueId 0] [lindex $lAccess 0] [lindex $lScore 0] [lindex $lPN 0]"
set Access [lindex $lBanqueId 0]
if {$Access==""} { continue }
set TFA [LaSequenceDuTFAs "/blast/Homo_sapiens_refMrna_hg19" $Access]
if { ! [regexp {^>([^ ]+) ([0-9]+)\n} $TFA Entete A N]} { FaireLire "$TFA"; continue }
set EnteteProt [PremiereLigneDuFichier [GscopeFile $Nom prottfa]]
regsub {>[^ ]+ } $EnteteProt ">$Nom $A $N RefMrna of " EnteteRefMrna
regsub {^>([^ ]+) ([0-9]+)\n} $TFA "$EnteteRefMrna\n" TFARefMrna
Espionne $EnteteRefMrna
Sauve $TFARefMrna dans [GscopeFile $Nom $RepRefMrna]
}
}
proc TestCrmMapper {} {
# set A [BirdSendQueryUrlAndGetFromUrl {select TXSTART from ucscmouse.refgene where chrom='chr11' and 77696429<=txStart and strand='+' order by TXSTART fetch first 1 rows only} {} format=flat]
# Espionne "$A"
set B [BirdSendQueryUrlAndGetFromUrl {select NAME, TXSTART, TXEND, STRAND from ucscmouse.refgene where chrom='chr11' and 77747137=txStart and strand='+' order by TXEND} {} format=plat]
Espionne "$B"
}
proc TestLocAfter {} {
set Var [LocAfter 77696429 Mouse chr11 +]
Espionne $Var
}
proc DrawGenesFromZone_AECRIRE {Debut Fin Org Chr {Strand ""} {FromWhere ""}} {
Wup "Attention UCSC compte a partir de 0"
set LesGenes [GenesFromZone $Debut $Fin $Org $Chr $Strand $FromWhere]
foreach Gene $LesGenes {
}
set K [UnCanva ]
}
proc LocIn {Position Org Chr {Strand ""} {FromWhere ""}} {
Wup "Attention UCSC compte a partir de 0"
if {$FromWhere==""} { set FromWhere "refGene" }
set org [string tolower $Org]
set fromwhere [string tolower $FromWhere]
set Where "chrom='$Chr' and txStart<=$Position and $Position<=txEnd"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART"
# set Retour [Bird $Query "" "flat"]
set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
set Retour [string trim $Retour " \n"]
return $Retour
}
proc LocAfter {Position Org Chr {Strand ""} {FromWhere ""}} {
Wup "Attention UCSC compte a partir de 0"
if {$FromWhere==""} { set FromWhere "refGene" }
set org [string tolower $Org]
set fromwhere [string tolower $FromWhere]
set Where "chrom='$Chr' and $Position<=txStart"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select TXSTART from ucsc$org.$fromwhere where $Where order by TXSTART fetch first 1 rows only"
# set Premier [Bird $Query "flat"]
set Premier [BirdSendQueryUrlAndGetFromUrl $Query "" "format=flat"]
if {$Premier==""} { return "" }
set Where "chrom='$Chr' and $Premier=txStart"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXEND"
# set Retour [Bird $Query "flat"]
set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
set Retour [string trim $Retour " \n"]
return $Retour
}
proc LocBefore {Position Org Chr {Strand ""} {FromWhere ""}} {
Wup "Attention UCSC compte a partir de 0"
if {$FromWhere==""} { set FromWhere "refGene" }
set org [string tolower $Org]
set fromwhere [string tolower $FromWhere]
set Where "chrom='$Chr' and txEnd<=$Position"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select TXEND from ucsc$org.$fromwhere where $Where order by TXEND desc fetch first 1 rows only"
# set Premier [Bird $Query "flat"]
set Premier [BirdSendQueryUrlAndGetFromUrl $Query "" "format=flat"]
if {$Premier==""} { return "" }
set Where "chrom='$Chr' and $Premier=txEnd"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART"
# set Retour [Bird $Query "flat"]
set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
set Retour [string trim $Retour " \n"]
return $Retour
}
proc GenesFromZone {Debut Fin Org Chr {Strand ""} {FromWhere ""}} {
Wup "Attention UCSC compte a partir de 0"
if {$FromWhere==""} { set FromWhere "refGene" }
set org [string tolower $Org]
set fromwhere [string tolower $FromWhere]
set Where "chrom='$Chr' and txstart>=$Debut and txend<=$Fin"
if {$Strand!=""} {
if {$Strand=="F"} { set Strand "+" }
if {$Strand=="R"} { set Strand "-" }
append Where " and strand='$Strand'"
}
set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART"
set Racine [Bird $Query "documentelement"]
set LesLignes {}
foreach Noeud [$Racine selectNodes "resultset"] {
set TxStart [$Noeud selectNodes "string(TXSTART)"]
set TxEnd [$Noeud selectNodes "string(TXEND)"]
set Strand [$Noeud selectNodes "string(STRAND)"]
set Name [$Noeud selectNodes "string(NAME)"]
set Ligne "$TxStart $TxEnd $Strand $Name"
lappend LesLignes $Ligne
}
return $LesLignes
}
proc GenomeSize {Orga {BigZips ""}} {
set Total 0
foreach FastaFile [BoutADNDeUcsc List normal Fasta $Orga - $BigZips] {
set Seq [QueLaSequenceDuFichierTFA $FastaFile]
set S [string length $Seq]
Espionne $S
lappend LaTaille [format "%10d %s" $S [file tail $FastaFile]]
incr Total $S
}
lappend LaTaille [format "%10d %s" $Total Total]
return $LaTaille
}
proc BoutADNDeUcsc {{Deb ""} {Fin ""} {Orient ""} {Orga ""} {Chro ""} {BigZips ""}} {
JeMeSignale
Wup "Attention BoutADNdeUCSC compte a partir de 1"
Wup "UCSC compte a partir de 0 pour les starts, et 1 pour les end"
set RepUCSCGenomes "/genomics/link/UCSCGenomes"
if {$Deb==""} {
set Usage {}
lappend Usage "BoutADNDeUcsc Dir"
lappend Usage "BoutADNDeUcsc List of organisms"
lappend Usage "BoutADNDeUcsc List of bigZips"
lappend Usage "BoutADNDeUcsc List of bigZips Homo_sapiens"
lappend Usage "BoutADNDeUcsc List of links"
lappend Usage "BoutADNDeUcsc List normal fasta Homo_sapiens - bigZips200903"
lappend Usage "BoutADNDeUcsc List all fasta Homo_sapiens - bigZips200903"
lappend Usage "BoutADNDeUcsc 23 2789 R Homo_sapiens chr1 bigZips200903"
lappend Usage "BoutADNDeUcsc 1 end F Homo_sapiens chr14 bigZips"
return [join $Usage "\n"]
}
if {[regexp -nocase {Dir} "$Deb"]} {
return $RepUCSCGenomes
}
if {[regexp -nocase {List} "$Deb"]} {
set LesBigZips [lsort [glob -nocomplain "$RepUCSCGenomes/*/bigZips*"]]
set LesOrganismes {}
set LesLiens {}
foreach BZ $LesBigZips {
set Org [file tail [file dirname $BZ]]
lappend LesOrganismes $Org
lappend BigZipsFrom($Org) [file tail $BZ]
if {[file type $BZ]=="link"} {
set Target [file readlink $BZ]
set Message "$BZ links to $Target"
lappend LesLiens $Message
lappend BigZipsFrom($Org) $Message
lappend LesBigZips $Message
}
}
if {[regexp -nocase "Link" "$Deb$Fin$Orient"]} { return $LesLiens }
set LesOrganismes [lsort -unique $LesOrganismes]
if {[regexp -nocase {List.*Org} "$Deb$Fin$Orient"]} { return $LesOrganismes }
foreach Org $LesOrganismes {
if {[regexp -nocase $Org "$Deb$Fin$Orient$Orga"] && [regexp -nocase BigZip "$Deb$Fin$Orient"]} {
return $BigZipsFrom($Org)
}
}
if {[regexp -nocase {List.*BigZip} "$Deb$Fin$Orient"]} { return $LesBigZips }
if {[regexp -nocase {List.*Fasta} "$Deb$Fin$Orient"]} {
set AllOrNot "Normal"
if {[regexp -nocase "All" "$Deb$Fin$Orient"]} { set AllOrNot "All" }
return [BoutADNDeUcsc "GetFastaFiles" $AllOrNot $Orient $Orga $Chro $BigZips]
}
return ""
}
if {$BigZips==""} { set BigZips "bigZips" }
set FichierTFA ""
if {$Orga==""} { set Orga "Homo_sapiens" }
set OrgaDir $Orga
if {[regexp -nocase "Human" $Orga]} { set OrgaDir "Homo_sapiens"}
if {[regexp -nocase "Mouse" $Orga]} { set OrgaDir "Mus_musculus"}
if {[regexp -nocase "Rat" $Orga]} { set OrgaDir "Rattus_norvegicus"}
if {[regexp -nocase "Worm" $Orga]} { set OrgaDir "Caenorhabditis_elegans"}
if {[regexp -nocase "Danio|Fish" $Orga]} { set OrgaDir "Danio_rerio"}
if {[regexp -nocase "Fly|Droso" $Orga]} { set OrgaDir "Drosophilia_melanogaster"}
if {[regexp -nocase {GetFastaFiles} $Deb]} {
set LesFasta [lsort -dictionary [glob -nocomplain "$RepUCSCGenomes/$OrgaDir/$BigZips/*.fa"]]
if {[regexp -nocase "all" $Fin]} { return $LesFasta }
set LesBonFasta {}
foreach F $LesFasta {
if {[regexp "_" [file tail $F]]} { continue }
lappend LesBonFasta $F
}
return $LesBonFasta
}
set FichierTFA "$RepUCSCGenomes/$OrgaDir/$BigZips/$Chro.fa"
if {[FileAbsent $FichierTFA]} {
regsub "chr" $Chro "" Rep
regsub "/$BigZips/" $FichierTFA "/$BigZips/$Rep/" FichierTFA
}
if {[FileAbsent $FichierTFA]} {
FaireLire "I cannot find $FichierTFA for $Orga $Chro $BigZips"
return ""
}
if {$Deb=="GetFile"} { return $FichierTFA }
return [BoutADNDuTFA $Deb $Fin $Orient $FichierTFA]
}
proc LocaliseDansLaListe {Liste Pos} {
if {$Pos<0} { return "-1 -1" }
set D [lindex $Liste 0]
set OldD $D
foreach F [lrange $Liste 1 end] {
if {$Pos>=$F} { set OldD $D ; set D $F ; continue }
return "$D $F"
}
if {$Pos==$F} {return "$OldD $F"}
return "-2 -2"
}
proc CytoBandUcsc {{Qui ""} {Quoi ""} {Quoi2 ""} {Quoi3 ""}} {
Wup "CytoBandUcsc Mouse 4qA3 returns the start and stop 17555508 27993917"
Wup "CytoBandUcsc Mouse chr4 17555585 returns the cytoband 4qA3"
Wup "CytoBandUcsc Mouse chr4 27993918 returns the cytoband 4qA4"
Wup "CytoBandUcsc Mouse Limits Of 4 return all limits "
Wup "CytoBandUcsc ListOf Organisms return available organisms"
if {! [OnTraiteUCSCGenomes]} {
return [QuestionDeScience "UCSCGenomes" "ret CytoBandUcsc $Qui $Quoi $Quoi2 $Quoi3"]
}
global CytoBandUcsc
if {$Quoi3!=""} { set Quoi2 "$Quoi2-$Quoi3" ; set Quoi3 "" }
if {$Quoi2!=""} { set Quoi "$Quoi-$Quoi2" ; set Quoi2 "" }
if {$Qui==""} {
set Qui "ListOf"
set Quoi "Organisms"
} else {
if {[regexp -nocase {Homo sapiens|Human} $Qui]} { set Qui "Human" }
if {[regexp -nocase {Mus musculus|Mouse} $Qui]} { set Qui "Mouse" }
set O $Qui
}
regsub "chr" $Quoi "" Quoi
if {[regexp -nocase {^[0-9A-Z]+\-[0-9]+$} $Quoi]} { set Quoi "Loc-$Quoi" }
if {[info exists CytoBandUcsc($Qui,$Quoi)]} { return $CytoBandUcsc($Qui,$Quoi) }
if {[regexp -nocase {^(Loc\-?[0-9A-Z]+)\-([0-9]+)$} $Quoi Match Chro Pos]} {
regsub {Loc\-?} $Chro "" Chro
if {[lsearch [CytoBandUcsc ListOf Organisms] $O]<0} { return "" }
set Bande [LocaliseDansLaListe [CytoBandUcsc $Qui "Limits-Of-$Chro"] $Pos]
regsub " " $Bande "-" Bande
return [CytoBandUcsc $O $Chro $Bande]
} else {
if {[info exists CytoBandUcsc("EstCharge")]} { return "" }
}
set CytoBandUcsc("EstCharge") 1
set FichierCytoBandHuman "/genomics/link/UCSCGenomes/Homo_sapiens/database/cytoBand.txt"
set FichierCytoBandMouse "/genomics/link/UCSCGenomes/Mus_musculus/database/cytoBand.txt"
set LesLignes {}
lappend LesLignes "Homo sapiens"
LConcat LesLignes [LesLignesDuFichier $FichierCytoBandHuman]
lappend LesLignes "Mus musculus"
LConcat LesLignes [LesLignesDuFichier $FichierCytoBandMouse]
set OldChro ""
foreach Ligne $LesLignes {
if { [regexp "^chrom" $Ligne]} { continue }
if { ! [regexp "^chr" $Ligne]} {
if {[info exists RajouterF]} {
lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
set CytoBandUcsc($O,$NChroCyto) "$D $F"
}
if {$Ligne=="Homo sapiens"} {
set O "Human"
lappend CytoBandUcsc(ListOf,Organisms) $O
}
if {$Ligne=="Mus musculus"} {
set O "Mouse"
lappend CytoBandUcsc(ListOf,Organisms) $O
}
continue
}
scan $Ligne "%s %s %s %s %s" Chro D F Cyto G
if {$OldChro!="" && $Chro!=$OldChro} {
if {[info exists RajouterF]} {
lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
set CytoBandUcsc($O,$NChroCyto) "$D $F"
}
}
set OldChro $Chro
regsub "chr" $Chro "" NChro
set NChroCyto "$NChro$Cyto"
set CytoBandUcsc($O,$NChroCyto) "$D [expr $F-1]"
set CytoBandUcsc($O,$NChro-$D-$F) $NChroCyto
lappend CytoBandUcsc($O,Limits-Of-$NChro) $D
lappend CytoBandUcsc($O,List-Of-CytoBands) $NChroCyto
set RajouterF $F
}
if {[info exists RajouterF]} {
lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
set CytoBandUcsc($O,$NChroCyto) "$D $F"
}
return [CytoBandUcsc $Qui $Quoi]
}
proc LocInBetween {Position Org Chr {FromWhere ""} {FinPosition ""}} {
#Quand on utilise LocUcsc Org=Mouse
#Quand on utilise LocUcscRefSeq Org=mm9
#NB: Pour Between, dans le cas ou pls NM chevauchants, n'en choisit qu'un dans le resultat (au hasard). Pour plus tard, considere tous les NM ou le plus long ?
Wup "Attention UCSC compte a partir de 0"
global LocInBetween
set NDeChaqueCote 20
if {$FinPosition==""} { set FinPosition $Position }
if {$FromWhere==""} { set FromWhere "Ucsc" }
set Foc "$FromWhere$Org$Chr"
if { ! [info exists LocInBetween($Foc)]} {
if {$FromWhere=="Ucsc"} {
#set LocInBetween($Foc) [LocUcsc ListOf$Org$Chr "StartEndName"]
set LocInBetween($Foc) [LocUcscRefSeq $Org ListOf$Chr "StartEndName"]
}
if {$FromWhere=="Affy"} {
set LocInBetween($Foc) [LocAffy ListOf$Org$Chr "StartEndName"]
}
}
set D 0
set F [expr [llength $LocInBetween($Foc)] - 1]
if {$F<$D} { return "Error NoStartEndNameAvailable" }
set SENDeb [lindex $LocInBetween($Foc) 0]
set SENFin [lindex $LocInBetween($Foc) end]
if {$SENDeb==""||$SENFin==""} { return "Error NoGenome" }
if {$Position<[lindex $SENDeb 0] || $Position>[lindex $SENFin 1]} {
return "Outside $SENDeb $SENFin"
}
set OldAvant $SENDeb
set OldApres $SENFin
while 1 {
#Espionne "$OldAvant $OldApres"
#Espionne "$D $F"
if {$F<$D} {
#Espionne "$F $D"
set StatusPremier "Between $OldAvant $OldApres"
break
}
set M [expr ($D+$F)/2]
set SEN [lindex $LocInBetween($Foc) $M]
#Espionne "$M $SEN"
set Start [lindex $SEN 0]
set End [lindex $SEN 1]
if {$Start<=$Position && $Position<=$End} {
set LesIns [list "In $SEN"]
#Espionne "$M $SEN"
for {set I [expr -$NDeChaqueCote]} { $I<=$NDeChaqueCote} { incr I } {
set MM [expr $M + $I]
set SEN [lindex $LocInBetween($Foc) $MM]
if {$SEN==""} { continue }
set Start [lindex $SEN 0]
set End [lindex $SEN 1]
if {$FinPosition<$Start || $End<$Position} { continue }
#Espionne "$MM $SEN"
lappend LesIns "In $SEN"
}
set LesIns [lsort -unique $LesIns]
return $LesIns
}
if {$D==$F} {
if {$Position<$Start} {set OldApres $SEN}
if {$End<$Position} {set OldAvant $SEN}
set StatusPremier "Between $OldAvant $OldApres"
break
}
if {$Position<$Start} {set OldApres $SEN; set F [expr $M -1]}
if {$End<$Position} {set OldAvant $SEN; set D [expr $M +1]}
}
#quand trouve Between dans la boucle du dessus, cherche si il y a pas tout de meme des In
set LesIns {}
for {set I [expr -$NDeChaqueCote]} { $I<=$NDeChaqueCote} { incr I } {
set MM [expr $M + $I]
set SEN [lindex $LocInBetween($Foc) $MM]
if {$SEN==""} { continue }
set Start [lindex $SEN 0]
set End [lindex $SEN 1]
if {$FinPosition<$Start || $End<$Position} { continue }
#Espionne "$MM $SEN"
lappend LesIns "In $SEN"
}
set LesIns [lsort -unique $LesIns]
if {$LesIns!={}} { return $LesIns }
return $StatusPremier
}
proc LocUcscEssai {{Qui ""} {Quoi ""}} {
Wup "Attention UCSC compte a partir de 0"
set RepertoireUcsc "/genomics/link/UCSCGenomes"
set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/knownGene.txt"
if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
if {$Quoi==""} { set Quoi "line" }
if {[info exists LocUcscEssai($Qui,$Quoi)]} { return $LocUcscEssai($Qui,$Quoi) }
if {[info exists LocUcscEssai("EstCharge")]} { return "" }
set LocUcscEssai("EstCharge") 1
set LesTetes [list KgId chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds proteinAcc alignID]
set LocUcscEssai(ListOf,Header) $LesTetes
return [LocUcscEssai $Qui $Quoi]
}
proc LocUcsc {{Qui ""} {Quoi ""} {Organism ""}} {
global LocUcsc
Wup "Attention UCSC compte a partir de 0 pour les start MAIS a partir de 1 pour les end !!!!!!!!!!!!!!!!!!!!!!!!"
Wup "ATTENTION depuis 20200115 on proend Homom_sapiens par defaut"
Wup "Pour la souris faire d'abord LocUcsc Mus_musculus"
set RepertoireUcsc "/genomics/link/UCSCGenomes"
# set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/mm9/last/knownGene.txt"
set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/mm9/last/refGene.txt"
set FichierLocUcscHuman "$RepertoireUcsc/Homo_sapiens/database/hg19/last/refGene.txt"
#rR On peut repositionner le defaut
if {[regexp -nocase "^H" $Organism]} {
if {[info exists LocUcsc(organism,file)]} {
if {$LocUcsc(organism,file)!=$FichierLocUcscHuman} {
unset LocUcsc
}
}
set LocUcsc(organism,file) $FichierLocUcscHuman
}
if {[regexp -nocase "^M" $Organism]} {
if {[info exists LocUcsc(organism,file)]} {
if {$LocUcsc(organism,file)!=$FichierLocUcscMouse} {
unset LocUcsc
}
}
set LocUcsc(organism,file) $FichierLocUcscMouse
}
if { ! [info exists LocUcsc(organism,file)]} { set LocUcsc(organism,file) $FichierLocUcscHuman }
if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
if {$Quoi==""} { set Quoi "line" }
if {[info exists LocUcsc($Qui,$Quoi)]} { return $LocUcsc($Qui,$Quoi) }
if {[info exists LocUcsc("EstCharge")]} {
if {[info exists LocUcsc($Qui,KgId)]} {
set KgId $LocUcsc($Qui,KgId)
if {[info exists LocUcsc($KgId,$Quoi)]} { return $LocUcsc($KgId,$Quoi) }
}
return ""
}
set LocUcsc("EstCharge") 1
set LesTetes [list KgId NM chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds Zero proteinAcc alignID]
set LocUcsc(ListOf,Header) $LesTetes
foreach Ligne [LesLignesDuFichier $LocUcsc(organism,file)] {
#rR c est un peu maladroit de indiquer que le blanc ne doit pas etre reduit de 2 a 1
set LesMots [LesMotsDeLaLigne $Ligne " "]
set KgId [lindex $LesMots 0]
lappend LocUcsc(ListOf,KgId) $KgId
foreach Tete $LesTetes Mot $LesMots {
set $Tete $Mot
set LocUcsc($KgId,$Tete) $Mot
}
lappend LocUcsc(ListOf,proteinAcc) $proteinAcc
lappend LocUcsc($proteinAcc,KgId) $KgId
set LocUcsc($KgId,line) $Ligne
lappend LocUcsc(ListOf,chrom) $chrom
#LocUcsc ListOfchr1 StartEndName
set StartEndName [list $txStart $txEnd $strand $KgId]
lappend LocUcsc(ListOf$chrom,StartEndName) $StartEndName
}
set LocUcsc(ListOf,chrom) [lsort -dictionary -unique $LocUcsc(ListOf,chrom)]
return [LocUcsc $Qui $Quoi]
}
proc LocUcscOld {{Qui ""} {Quoi ""}} {
Wup "Attention UCSC compte a partir de 0"
global LocUcsc
#set WithoutQDS [expr [info exists LocUcsc(FichierLocUcscHuman)] || [info exists LocUcsc(FichierLocUcscMouse)]]
#if { ! [OnTraiteUCSCGenomes] && ! $WithoutQDS} { return [QuestionDeScience "UCSCGenomes" "ret LocUcsc $Qui $Quoi"] }
#if {[OnTraiteUCSCGenomes]} {
#set RepertoireUcsc [RepertoireDuGenome]239RefSq.affyID_xenoAnnot.txt
#} else {
#set RepertoireUcsc "/genomics/link/UCSCGenomes"
# }
set RepertoireUcsc "/genomics/link/UCSCGenomes"
if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
if {$Quoi==""} { set Quoi "line" }
if {[info exists LocUcsc($Qui,$Quoi)]} { return $LocUcsc($Qui,$Quoi) }
if {[info exists LocUcsc("EstCharge")]} { return "" }
set LocUcsc("EstCharge") 1
if {[info exists LocUcsc(FichierLocUcscHuman)]} {
set FichierLocUcscHuman $LocUcsc(FichierLocUcscHuman)
} else {
set FichierLocUcscHuman "$RepertoireUcsc/Homo_sapiens/database/knownGene.txt"
}
if {[info exists LocUcsc(FichierLocUcscMouse)]} {
set FichierLocUcscMouse $LocUcsc(FichierLocUcscMouse)
} else {
set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/knownGene.txt"
}
set OnTraiteRefGene [regexp "refGene" [file tail $FichierLocUcscMouse]]
if {$OnTraiteRefGene} {
set EnteteCommuneRefGene "bin name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds id name2 cdsStartStat cdsEndStat exonFrames"
set LesTetes [split $EnteteCommuneRefGene " "]
} else {
set EnteteCommuneKnownGene "KgId chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds proteinAcc alignID"
set LesTetes [split $EnteteCommuneKnownGene " "]
}
set LocUcsc(ListOf,Header) $LesTetes
foreach Ligne [concat \
[list "Human"] [LesLignesDuFichier $FichierLocUcscHuman] \
[list "Mouse"] [LesLignesDuFichier $FichierLocUcscMouse]] {
if {[regexp {^(Human|Mouse)$} $Ligne]} { set HumMou $Ligne ; continue }
set LesMots [LesMotsDeLaLigne $Ligne]
if {$OnTraiteRefGene} {
set Name [lindex $LesMots 1]
} else {
set Name [lindex $LesMots 0]
}
if {[info exists LocUcsc($Name,name)]} {
set ReferenceName $Name
set Ieme 1
set NewName "$Name-[incr Ieme]"
while {[info exists LocUcsc($NewName,name)]} { set NewName "$Name-[incr Ieme]" }
set LocUcsc($NewName,ReferenceName) $ReferenceName
set Name $NewName
lappend LocUcsc(PolyLocalisationOf,$ReferenceName) $NewName
lappend LocUcsc(ListOf,PolyLocalisation) $NewName
}
lappend LocUcsc(ListOf$HumMou,Access) $Name
lappend LocUcsc(ListOf,Access) $Name
foreach Tete $LesTetes Mot $LesMots {
set $Tete $Mot
set LocUcsc($Name,$Tete) $Mot
}
if {$OnTraiteRefGene} {
lappend LocUcsc(ListOf,name2) $name2
lappend LocUcsc($name2,line) $Name
} else {
lappend LocUcsc(ListOf,proteinID) $proteinID
lappend LocUcsc($proteinID,line) $Name
}
set LocUcsc($Name,line) $Ligne
lappend LocUcsc(ListOf$HumMou,chrom) $chrom
set StartEndName [list $txStart $txEnd $strand $Name]
lappend LocUcsc(ListOf$HumMou$chrom,StartEndName) $StartEndName
}
foreach Org [list "Human" "Mouse"] {
set LocUcsc(ListOf$Org,chrom) [lsort -unique $LocUcsc(ListOf$Org,chrom)]
foreach Chr $LocUcsc(ListOf$Org,chrom) {
set LocUcsc(ListOf$Org$Chr,StartEndName) [lsort -integer -index 0 $LocUcsc(ListOf$Org$Chr,StartEndName)]
}
}
return [LocUcsc $Qui $Quoi]
}
proc NotationUCSC {Qui {Quoi ""}} {
scan $Qui "%s" Qui
regsub ">" $Qui "" Qui
set QUI [string toupper $Qui]
if {$Quoi==""} {
if {[regexp "^CHR" $QUI]} {
set Quoi "IGBMC"
} else {
set Quoi "UCSC"
}
}
global NotationUCSC
if {[info exists NotationUCSC($QUI,$Quoi)]} {return $NotationUCSC($QUI,$Quoi)}
if {[info exists NotationUCSC("EstCharge")]} {return ""}
set NotationUCSC("EstCharge") 1
set L { chr1 chr1_random chr2 chr2_random chr3 chr3_random chr4 chr4_random \
chr5 chr5_random chr6 chr6_random chr7 chr7_random chr8 chr8_random \
chr9 chr9_random chr10 chr10_random chr11 chr12 chr12_random \
chr13 chr13_random chr14 chr15 chr15_random chr16 chr16_random \
chr17 chr17_random chr18 chr18_random chr19 chr19_random chr20 \
chr21 chr22 chr22_random \
chrX chrX_random chrY chrM chr6_hla_hap1 chr6_hla_hap2 }
foreach Chr $L {
set CHR [string toupper $Chr]
set K $Chr
regsub "chr" $K "HS" K
regsub "_random" $K "rand" K
regsub "_hla_hap1" $K "hap1" K
regsub "_hla_hap2" $K "hap2" K
if {[regexp {HS([0-9]+)} $K Match n]} {
set NN [format "%2.2d" $n]
regsub "$n" $K $NN K
}
set Access "UCSC:$K"
set Header ">$Access Homo sapiens complete genome $Access Build_35_hg17_May_2004"
set NotationUCSC($CHR,UCSC) $Chr
set NotationUCSC($CHR,IGBMC) $Access
set NotationUCSC($Access,UCSC) $Chr
set NotationUCSC($Access,IGBMC) $Access
set NotationUCSC($CHR,Header) $Header
set NotationUCSC($Access,Header) $Header
lappend NotationUCSC(LISTOF,UCSC) $Chr
lappend NotationUCSC(LISTOF,IGBMC) $Access
lappend NotationUCSC(LISTOF,Header) $Header
}
return [NotationUCSC $Qui $Quoi]
}
proc HumanGenomeUcsc {} {
set I [open "HumanUCSC_Build37_hg17_May2004_origine" "r"]
set O [open "HumanUCSC_Build37_hg17_May2004" "w"]
while {[gets $I Ligne]>=0} {
if {[regexp {>([^ ]+)} $Ligne Match X]} {
set Ligne NotationUCSC($X,Header)
}
puts $O $Ligne
}
close $I
close $O
exit
}
Index by: file name |
procedure name |
procedure call |
annotation
File generated 2022-04-05 at 12:55.