Index by: file name | procedure name | procedure call | annotation
gscope_ucsc.tcl (annotations | original source)

#rR Debut de gscope_ucsc.tcl
#rR Attention UCSC compte a partir de 0


proc ConcordanceNmNuctfa {} {
    foreach Nom [ListeDesPABs] {
	set FiNuc [GscopeFile $Nom nuctfaRefMrna]
	if {[FileAbsent $FiNuc]} {
	    set MyRefMrna "NotFound"
	} else {
	    set Entete [PremiereLigneDuFichier $FiNuc]
	    scan $Entete "%s %s" N MyRefMrna
	}

	set FiLoc [GscopeFile $Nom LocCdsOnRefMrna]
	if {[FileAbsent $FiLoc]} {
	    set NmLoc "NotFound"
	} else {
	    set LigneLoc [PremiereLigneDuFichier $FiLoc]
	    set NmLoc [lindex [LesMotsDuTexte $LigneLoc] 4]
	}
	if {$MyRefMrna!=$NmLoc} {
	    Espionne "$Nom $MyRefMrna!=$NmLoc"
	} else {
	    Espionne "$Nom $MyRefMrna"
	}
    }
}

proc LocaliseCdsOnMyRefMrna {{Liste ""}} {
    #rR On a fait les tblastn des prottfa sur refMrna

    set DirName "LocCdsOnRefMrna"
    file mkdir [RepertoireDuGenome]/$DirName

    if {$Liste==""} { set Liste [ListeDesPABs] }
    foreach Nom $Liste {
	set LocFile [GscopeFile $Nom $DirName]
	if {[file exists $LocFile]} { continue }
	if {[info exists DSdeDQ]} { unset DSdeDQ ; unset FSdeFQ }
	if {[info exists DejaVu]} { unset DejaVu }
	set FiTBlastN [GscopeFile $Nom "tblastn_Homo_sapiens_refMrna_hg19"]
	if {[FileAbsent $FiTBlastN]} { continue }
	set f [open $FiTBlastN "r"]
	set NoHitsFound 0
	while {[gets $f Ligne]>=0} {
	    if {[regexp "No hits found" $Ligne]} { set NoHitsFound 1 ; break }
	    if {[string index $Ligne 0]!=">"} { continue }
	    scan $Ligne %s NM
	    set NM [string trim $NM ">"]
	    gets $f Ligne
	    set LongNM [IntegerApres "Length =" dans $Ligne]
	    break
	}
	if {$NoHitsFound } { continue }
	#Espionne ""
	#Espionne $Nom
	gets $f Ligne
	gets $f Ligne
	
	if { ! [regexp {Score = +([0-9\.\+\-e]+) bits \(([0-9]+)\)\, Expect[^ ]* = ([^,]+),} $Ligne Match Score ScoreExact Expect]} { FaireLire "pas de Score dans $Ligne"; exit }
	#if {[regexp 1.001 $Ligne]} { Espionne "$Nom $Ligne"; exit }
	if {[regexp "^e" $Expect]} { set Expect "1.$Expect"}
	if {[regexp "e" $Score]} { set Score 9999 }
	
	gets $f Ligne
	if { ! [regexp {Identities = ([0-9]+)/([0-9]+) \(([0-9]+)%\)} $Ligne Match IdentUp IdentDown PId]} { FaireLire "pas de Identities dans $Ligne"; exit }
	
	gets $f Ligne
	set Frame $Ligne
	
	set DebQuery ""
	set DebSbjct ""
	set FinQuery ""
	set FinSbjct ""
	set CaSuffit 0
	while {[gets $f Ligne]>=0} {
	    if {[regexp {Score } $Ligne] || [regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
	    if {[regexp {^Query: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DQ FQ]} {
		if {$DebQuery==""} { set DebQuery $DQ }
		set FinQuery $FQ
	    }
	    if {[regexp {^Sbjct: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DS FS]} {
		if {$DebSbjct==""} { set DebSbjct $DS }
		set FinSbjct $FS
	    }
	}
	close $f
	set Message [format "%s %5d - %5d %-10s %5d - %5d %s %4d/%4d %3d%% %5d %5d %f" $Nom $DebQuery $FinQuery $NM $DebSbjct $FinSbjct $Frame $IdentUp $IdentDown $PId $Score $ScoreExact $Expect]
	Espionne $Message
	Sauve $Message dans $LocFile
    }
}

proc LocaliseMyRefMrna {{Liste ""}} {
    #rR On a fait les blastn des refMrna sur le genome humain

    set DirName "LocRefMrna"
    file mkdir [RepertoireDuGenome]/$DirName

    if {$Liste==""} { set Liste [ListeDesPABs] }
    
    foreach Nom $Liste {
	set LocFile [GscopeFile $Nom $DirName]
	if {[file exists $LocFile]} { continue }
	if {[info exists DSdeDQ]} { unset DSdeDQ ; unset FSdeFQ }
	if {[info exists DejaVu]} { unset DejaVu }
	set FiBlastN [GscopeFile $Nom "blastnHomo_sapiens_2009"]
	if {[FileAbsent $FiBlastN]} { continue }
	Espionne $Nom
	set f [open $FiBlastN "r"]
	set NoHitsFound 0
	while {[gets $f Ligne]>=0} {
	    if {[regexp "No hits found" $Ligne]} { set NoHitsFound 1 ; break }
	    if {[string index $Ligne 0]!=">"} { continue }
	    set Chr [string trim [string range $Ligne 1 end]]
	    gets $f Ligne
	    set LongChr [IntegerApres "Length =" dans $Ligne]
	    break
	}
	if {$NoHitsFound } { close $f; continue }
	#Espionne ""
	#Espionne $Nom
	gets $f Ligne
	gets $f Ligne
	set LesDebQuery {}
	set LesFinQuery {}
	set LesDebSbjct {}
	set LesFinSbjct {}
	set LesDQDejaVu {}
	set LesFQDejaVu {}
	set Skip 0
	while 1 {
	    if { ! [regexp {Score = +([0-9\.\+\-e]+) bits \(([0-9]+)\)\, Expect = (.+)$} $Ligne Match Score ScoreExact Expect]} { FaireLire "$Nom pas de Score dans $Ligne"; set Skip 1; break }
	    #if {[regexp 1.001 $Ligne]} { Espionne "$Nom $Ligne"; exit }
	    if {[regexp "^e" $Expect]} { set Expect "1.$Expect"}
	    if {[regexp "e" $Score]} { set Score 9999 }
	    
	    gets $f Ligne
	    if { ! [regexp {Identities = ([0-9]+)/([0-9]+) \(([0-9]+)%\)} $Ligne Match IdentUp IdentDown PId]} { FaireLire "$Nom pas de Identities dans $Ligne"; set Skip 1; continue }
		 
	    gets $f Ligne
	    set Strand $Ligne
	    if {[regexp "Minus" $Strand]} { set Strand "R" } else { set Strand "F" }
	    
	    while 1 {
		set DebQuery ""
		set DebSbjct ""
		set FinQuery ""
		set FinSbjct ""
		set CaSuffit 0
		while {[gets $f Ligne]>=0} {
#Espionne $Ligne
		    if {[regexp {Score } $Ligne] || [regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
		    if {[regexp {^Query: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DQ FQ]} {
			if {$DebQuery==""} { set DebQuery $DQ }
			set FinQuery $FQ
		    }
		    if {[regexp {^Sbjct: +([0-9]+)[^0-9]+([0-9]+)$} $Ligne Match DS FS]} {
			if {$DebSbjct==""} { set DebSbjct $DS }
			set FinSbjct $FS
		    }
		}
		set Vu 0
		foreach DQDV $LesDQDejaVu FQDV $LesFQDejaVu {
		    if {$DQDV <= $DebQuery && $FinQuery <= $FQDV} { set Vu 1; break }
		}
		if { ! $Vu } {
		    lappend LesDQDejaVu $DebQuery
		    lappend LesFQDejaVu $FinQuery
		    lappend LesDebQuery $DebQuery
		    lappend LesFinQuery $FinQuery
		    lappend LesDebSbjct $DebSbjct
		    lappend LesFinSbjct $FinSbjct
		}
		if {[regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
	    }
	    if {[regexp {^>} $Ligne] || [regexp "Database" $Ligne]} { break }
	}
	close $f
	if {$Skip} { continue }
	foreach DQ $LesDebQuery DS $LesDebSbjct {
	    set DSdeDQ($DQ) $DS
	}
	set LesDebQuery [lsort -integer $LesDebQuery]
	set LesDebSbjct {}
	foreach DQ $LesDebQuery {
	    lappend LesDebSbjct $DSdeDQ($DQ)
	}

	foreach FQ $LesFinQuery FS $LesFinSbjct {
	    set FSdeFQ($FQ) $FS
	}
	set LesFinQuery [lsort -integer $LesFinQuery]
	set LesFinSbjct {}
	foreach FQ $LesFinQuery FS $LesFinSbjct {
	    lappend LesFinSbjct $FSdeFQ($FQ)
	}

	set LeMessage {}
	foreach DQ $LesDebQuery  FQ $LesFinQuery  DS $LesDebSbjct  FS $LesFinSbjct {
	    if {[info exists DejaVu($DQ-$FQ)]} { continue } 
	    set DejaVu($DQ-$FQ) 1
	    set Message [format "%s %5d - %5d %-5s %12d - %12d %s %4d/%4d %3d%% %5d %5d %f" $Nom $DQ $FQ $Chr $DS $FS $Strand $IdentUp $IdentDown $PId $Score $ScoreExact $Expect]
	    lappend LeMessage $Message
	}
	SauveLesLignes $LeMessage dans $LocFile
    }
}

proc MyRefMrna {} {
    set RepTbn "tblastn_Homo_sapiens_refMrna_hg19"
    set RepRefMrna "nuctfaRefMrna"
    file mkdir "[RepertoireDuGenome]/$RepRefMrna"
    foreach Nom [ListeDesPABs] {
	set Fichier [GscopeFile $Nom $RepTbn]
	if {[FileAbsent $Fichier]} { continue }
	DecortiqueBlast $Fichier "" "" "" lBanqueId lAccess lScore lPN lPartieSegAli
	Espionne "[lindex $lBanqueId 0] [lindex $lAccess 0] [lindex $lScore 0] [lindex $lPN 0]"
	set Access [lindex $lBanqueId 0]
	if {$Access==""} { continue }
	set TFA [LaSequenceDuTFAs "/blast/Homo_sapiens_refMrna_hg19" $Access]
	if { ! [regexp {^>([^ ]+) ([0-9]+)\n} $TFA Entete A N]} { FaireLire "$TFA"; continue }
	set EnteteProt [PremiereLigneDuFichier [GscopeFile $Nom prottfa]]
	regsub {>[^ ]+ } $EnteteProt ">$Nom $A $N RefMrna of " EnteteRefMrna
	regsub {^>([^ ]+) ([0-9]+)\n} $TFA "$EnteteRefMrna\n" TFARefMrna
	Espionne $EnteteRefMrna
	Sauve $TFARefMrna dans [GscopeFile $Nom $RepRefMrna]
    }
}


proc TestCrmMapper {} {
#    set A [BirdSendQueryUrlAndGetFromUrl {select TXSTART from ucscmouse.refgene where chrom='chr11' and 77696429<=txStart and strand='+' order by TXSTART fetch first 1 rows only} {} format=flat] 
#    Espionne "$A"
    set B [BirdSendQueryUrlAndGetFromUrl {select NAME, TXSTART, TXEND, STRAND from ucscmouse.refgene where chrom='chr11' and 77747137=txStart and strand='+' order by TXEND} {} format=plat]
    Espionne "$B"

}

proc TestLocAfter {} {
    set Var [LocAfter 77696429 Mouse chr11 +]
    Espionne $Var
}

proc DrawGenesFromZone_AECRIRE {Debut Fin Org Chr {Strand ""} {FromWhere ""}} {
    Wup "Attention UCSC compte a partir de 0"
    set LesGenes [GenesFromZone $Debut $Fin $Org $Chr $Strand $FromWhere]
    foreach Gene $LesGenes {
    
    }
    set K [UnCanva ]
}

proc LocIn {Position Org Chr {Strand ""} {FromWhere ""}} {
    Wup "Attention UCSC compte a partir de 0"
    if {$FromWhere==""} { set FromWhere "refGene" }

    set org [string tolower $Org]
    set fromwhere [string tolower $FromWhere]

    set Where "chrom='$Chr' and txStart<=$Position and $Position<=txEnd"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }

    set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART"
#   set Retour [Bird $Query "" "flat"]
    set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
    set Retour [string trim $Retour " \n"]
    return $Retour
}

proc LocAfter {Position Org Chr {Strand ""} {FromWhere ""}} {
    Wup "Attention UCSC compte a partir de 0"
    if {$FromWhere==""} { set FromWhere "refGene" }

    set org [string tolower $Org]
    set fromwhere [string tolower $FromWhere]

    set Where "chrom='$Chr' and $Position<=txStart"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }

    set Query "select TXSTART from ucsc$org.$fromwhere where $Where order by TXSTART fetch first 1 rows only" 
#   set Premier [Bird $Query "flat"]
    set Premier [BirdSendQueryUrlAndGetFromUrl $Query "" "format=flat"]
    if {$Premier==""} { return "" }

    set Where "chrom='$Chr' and $Premier=txStart"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }
    set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXEND" 
#   set Retour [Bird $Query "flat"]
    set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
    set Retour [string trim $Retour " \n"]
    return $Retour
}

proc LocBefore {Position Org Chr {Strand ""} {FromWhere ""}} {
    Wup "Attention UCSC compte a partir de 0"
    if {$FromWhere==""} { set FromWhere "refGene" }

    set org [string tolower $Org]
    set fromwhere [string tolower $FromWhere]

    set Where "chrom='$Chr' and txEnd<=$Position"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }

    set Query "select TXEND from ucsc$org.$fromwhere where $Where order by TXEND desc fetch first 1 rows only" 
#   set Premier [Bird $Query "flat"]
    set Premier [BirdSendQueryUrlAndGetFromUrl $Query "" "format=flat"]
    if {$Premier==""} { return "" }

    set Where "chrom='$Chr' and $Premier=txEnd"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }
    set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART" 
#   set Retour [Bird $Query "flat"]
    set Retour [BirdSendQueryUrlAndGetFromUrl $Query "" "format=plat"]
    set Retour [string trim $Retour " \n"]
    return $Retour
}

proc GenesFromZone {Debut Fin Org Chr {Strand ""} {FromWhere ""}} {
    Wup "Attention UCSC compte a partir de 0"
    if {$FromWhere==""} { set FromWhere "refGene" }

    set org [string tolower $Org]
    set fromwhere [string tolower $FromWhere]

    set Where "chrom='$Chr' and txstart>=$Debut and txend<=$Fin"
    if {$Strand!=""} {
	if {$Strand=="F"} { set Strand "+" }
	if {$Strand=="R"} { set Strand "-" }
	append Where " and strand='$Strand'"
    }

    set Query "select NAME, TXSTART, TXEND, STRAND from ucsc$org.$fromwhere where $Where order by TXSTART" 
    set Racine [Bird $Query "documentelement"]
    set LesLignes {}
    foreach Noeud [$Racine selectNodes "resultset"] {
	set TxStart   [$Noeud selectNodes "string(TXSTART)"]
	set TxEnd     [$Noeud selectNodes "string(TXEND)"]
	set Strand    [$Noeud selectNodes "string(STRAND)"]
	set Name      [$Noeud selectNodes "string(NAME)"]
	set Ligne "$TxStart $TxEnd $Strand $Name"
	lappend LesLignes $Ligne
    }
    return $LesLignes
}

proc GenomeSize {Orga {BigZips ""}} {
    set Total 0
    foreach FastaFile [BoutADNDeUcsc List normal Fasta $Orga - $BigZips] {
	set Seq [QueLaSequenceDuFichierTFA $FastaFile]
	set S [string length $Seq]
	Espionne $S
	lappend LaTaille [format "%10d %s" $S [file tail $FastaFile]]
	incr Total $S
    }
    lappend LaTaille [format "%10d %s" $Total Total]
    return $LaTaille
}

proc BoutADNDeUcsc {{Deb ""} {Fin ""} {Orient ""} {Orga ""} {Chro ""} {BigZips ""}} {
    JeMeSignale
    Wup "Attention BoutADNdeUCSC compte a partir de 1"
    Wup "UCSC compte a partir de 0 pour les starts, et 1 pour les end"

    set RepUCSCGenomes "/genomics/link/UCSCGenomes"

    if {$Deb==""} {
	set Usage {}
	lappend Usage "BoutADNDeUcsc Dir"
	lappend Usage "BoutADNDeUcsc List of organisms"
	lappend Usage "BoutADNDeUcsc List of bigZips"
	lappend Usage "BoutADNDeUcsc List of bigZips Homo_sapiens"
	lappend Usage "BoutADNDeUcsc List of links"
	lappend Usage "BoutADNDeUcsc List normal fasta Homo_sapiens - bigZips200903"
	lappend Usage "BoutADNDeUcsc List all fasta Homo_sapiens - bigZips200903"
	lappend Usage "BoutADNDeUcsc 23 2789 R Homo_sapiens chr1 bigZips200903"
	lappend Usage "BoutADNDeUcsc 1 end F Homo_sapiens chr14 bigZips"
	return [join $Usage "\n"]
    }
    if {[regexp -nocase {Dir} "$Deb"]} {
	return $RepUCSCGenomes
    }
    if {[regexp -nocase {List} "$Deb"]} {
	set LesBigZips [lsort [glob -nocomplain "$RepUCSCGenomes/*/bigZips*"]]
	set LesOrganismes {}
	set LesLiens {}
	foreach BZ $LesBigZips {
	    set Org [file tail [file dirname $BZ]]
	    lappend LesOrganismes $Org
	    lappend BigZipsFrom($Org) [file tail $BZ]
	    if {[file type $BZ]=="link"} {
		set Target [file readlink $BZ]
		set Message "$BZ links to $Target"
		lappend LesLiens $Message
		lappend BigZipsFrom($Org) $Message
		lappend LesBigZips $Message
	    }
	}
	if {[regexp -nocase "Link" "$Deb$Fin$Orient"]} { return $LesLiens }
	set LesOrganismes [lsort -unique $LesOrganismes]
	if {[regexp -nocase {List.*Org} "$Deb$Fin$Orient"]} { return $LesOrganismes }
	foreach Org $LesOrganismes {
	    if {[regexp -nocase $Org "$Deb$Fin$Orient$Orga"] && [regexp -nocase BigZip "$Deb$Fin$Orient"]} {
		return $BigZipsFrom($Org)
	    }
	}
	if {[regexp -nocase {List.*BigZip} "$Deb$Fin$Orient"]} { return $LesBigZips }
	if {[regexp -nocase {List.*Fasta}  "$Deb$Fin$Orient"]} {
	    set AllOrNot "Normal"
	    if {[regexp -nocase "All" "$Deb$Fin$Orient"]} { set AllOrNot "All" }
	    return [BoutADNDeUcsc "GetFastaFiles" $AllOrNot $Orient $Orga $Chro $BigZips]
	}
	return ""
    }
    
    if {$BigZips==""} { set BigZips "bigZips" }
    set FichierTFA ""
    if {$Orga==""} { set Orga "Homo_sapiens" }
    set OrgaDir $Orga
    if {[regexp -nocase "Human"      $Orga]} { set OrgaDir "Homo_sapiens"}
    if {[regexp -nocase "Mouse"      $Orga]} { set OrgaDir "Mus_musculus"}
    if {[regexp -nocase "Rat"        $Orga]} { set OrgaDir "Rattus_norvegicus"}
    if {[regexp -nocase "Worm"       $Orga]} { set OrgaDir "Caenorhabditis_elegans"}
    if {[regexp -nocase "Danio|Fish" $Orga]} { set OrgaDir "Danio_rerio"}
    if {[regexp -nocase "Fly|Droso"  $Orga]} { set OrgaDir "Drosophilia_melanogaster"}
    
    if {[regexp -nocase {GetFastaFiles} $Deb]} {
	set LesFasta [lsort -dictionary [glob -nocomplain "$RepUCSCGenomes/$OrgaDir/$BigZips/*.fa"]]
	if {[regexp -nocase "all" $Fin]} { return $LesFasta }
	set LesBonFasta {}
	foreach F $LesFasta {
	    if {[regexp "_" [file tail $F]]} { continue }
	    lappend LesBonFasta $F
	}
	return $LesBonFasta
    }
	

    set FichierTFA "$RepUCSCGenomes/$OrgaDir/$BigZips/$Chro.fa"
    if {[FileAbsent $FichierTFA]} {
	regsub "chr" $Chro "" Rep
	regsub "/$BigZips/" $FichierTFA "/$BigZips/$Rep/" FichierTFA
    }
    if {[FileAbsent $FichierTFA]} {
	FaireLire "I cannot find $FichierTFA for $Orga $Chro $BigZips"
	return ""
    }
    if {$Deb=="GetFile"} { return $FichierTFA }
    return [BoutADNDuTFA $Deb $Fin $Orient $FichierTFA]
}

proc LocaliseDansLaListe {Liste Pos} {
    if {$Pos<0} { return "-1 -1" }
    set D [lindex $Liste 0]
    set OldD $D
    foreach F [lrange $Liste 1 end] {
	if {$Pos>=$F} { set OldD $D ; set D $F ; continue }
	return "$D $F"
    }
    if {$Pos==$F} {return "$OldD $F"}
    return "-2 -2"
    
}

proc CytoBandUcsc {{Qui ""} {Quoi ""} {Quoi2 ""} {Quoi3 ""}} {
    Wup "CytoBandUcsc Mouse 4qA3                   returns the start and stop 17555508 27993917"
    Wup "CytoBandUcsc Mouse chr4 17555585          returns the cytoband 4qA3"
    Wup "CytoBandUcsc Mouse chr4          27993918 returns the cytoband 4qA4"
    Wup "CytoBandUcsc Mouse Limits Of 4            return all limits "
    Wup "CytoBandUcsc ListOf Organisms             return available organisms"
    if {! [OnTraiteUCSCGenomes]} {
	return [QuestionDeScience "UCSCGenomes" "ret CytoBandUcsc $Qui $Quoi $Quoi2 $Quoi3"]
    }
    global CytoBandUcsc
    if {$Quoi3!=""} { set Quoi2 "$Quoi2-$Quoi3" ; set Quoi3 "" } 
    if {$Quoi2!=""} { set Quoi "$Quoi-$Quoi2" ; set Quoi2 "" } 

    if {$Qui==""} {
	set Qui "ListOf"
	set Quoi "Organisms"
    } else {
	if {[regexp -nocase {Homo sapiens|Human} $Qui]} { set Qui "Human" }
	if {[regexp -nocase {Mus musculus|Mouse} $Qui]} { set Qui "Mouse" }
	set O $Qui
    }
    regsub "chr" $Quoi "" Quoi

    if {[regexp -nocase {^[0-9A-Z]+\-[0-9]+$} $Quoi]} { set Quoi "Loc-$Quoi" }

    if {[info exists CytoBandUcsc($Qui,$Quoi)]} { return $CytoBandUcsc($Qui,$Quoi) }
    if {[regexp -nocase {^(Loc\-?[0-9A-Z]+)\-([0-9]+)$} $Quoi Match Chro Pos]} {
	regsub {Loc\-?} $Chro "" Chro
	if {[lsearch [CytoBandUcsc ListOf Organisms] $O]<0} { return "" }
	set Bande [LocaliseDansLaListe [CytoBandUcsc $Qui "Limits-Of-$Chro"] $Pos]
	regsub " " $Bande "-" Bande
	return [CytoBandUcsc $O $Chro $Bande]
    } else {
	if {[info exists CytoBandUcsc("EstCharge")]} { return "" }
    }
    set CytoBandUcsc("EstCharge") 1

    set FichierCytoBandHuman "/genomics/link/UCSCGenomes/Homo_sapiens/database/cytoBand.txt"
    set FichierCytoBandMouse "/genomics/link/UCSCGenomes/Mus_musculus/database/cytoBand.txt"

    set LesLignes {}
    lappend LesLignes "Homo sapiens"
    LConcat LesLignes [LesLignesDuFichier $FichierCytoBandHuman]
    lappend LesLignes "Mus musculus"
    LConcat LesLignes [LesLignesDuFichier $FichierCytoBandMouse]

    set OldChro ""
    foreach Ligne $LesLignes {
	if { [regexp "^chrom" $Ligne]} { continue }
	if { ! [regexp "^chr" $Ligne]} {
	    if {[info exists RajouterF]} {
		lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
		set CytoBandUcsc($O,$NChroCyto) "$D $F"
	    }
	    if {$Ligne=="Homo sapiens"} {
		set O "Human"
		lappend CytoBandUcsc(ListOf,Organisms) $O
	    }
	    if {$Ligne=="Mus musculus"} {
		set O "Mouse"
		lappend CytoBandUcsc(ListOf,Organisms) $O
	    }
	    continue
	}
	scan $Ligne "%s %s %s %s %s" Chro D F Cyto G
	if {$OldChro!="" && $Chro!=$OldChro} {
	    if {[info exists RajouterF]} {
		lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
		set CytoBandUcsc($O,$NChroCyto) "$D $F"
	    }
	}
	set OldChro $Chro
	regsub "chr" $Chro "" NChro
	set NChroCyto "$NChro$Cyto"
	set CytoBandUcsc($O,$NChroCyto) "$D [expr $F-1]"
	set CytoBandUcsc($O,$NChro-$D-$F) $NChroCyto
	lappend CytoBandUcsc($O,Limits-Of-$NChro) $D
	lappend CytoBandUcsc($O,List-Of-CytoBands) $NChroCyto
	set RajouterF $F
    }
    if {[info exists RajouterF]} {
	lappend CytoBandUcsc($O,Limits-Of-$NChro) $RajouterF
	set CytoBandUcsc($O,$NChroCyto) "$D $F"
    }
    return [CytoBandUcsc $Qui $Quoi]
}

proc LocInBetween {Position Org Chr {FromWhere ""} {FinPosition ""}} {
    #Quand on utilise LocUcsc Org=Mouse
    #Quand on utilise LocUcscRefSeq Org=mm9
    #NB: Pour Between, dans le cas ou pls NM chevauchants, n'en choisit qu'un dans le resultat (au hasard). Pour plus tard, considere tous les NM ou le plus long ?
    Wup "Attention UCSC compte a partir de 0"
    global LocInBetween

    set NDeChaqueCote 20

    if {$FinPosition==""} { set FinPosition $Position }
    if {$FromWhere==""} { set FromWhere "Ucsc" }

    set Foc "$FromWhere$Org$Chr"

    if { ! [info exists LocInBetween($Foc)]} {
	if {$FromWhere=="Ucsc"} {
	    #set LocInBetween($Foc) [LocUcsc ListOf$Org$Chr "StartEndName"] 
	    set LocInBetween($Foc) [LocUcscRefSeq $Org ListOf$Chr "StartEndName"]
	}
	if {$FromWhere=="Affy"} {
	    set LocInBetween($Foc) [LocAffy ListOf$Org$Chr "StartEndName"] 
	}
    }
    set D 0
    set F [expr [llength $LocInBetween($Foc)] - 1]
    if {$F<$D} { return "Error NoStartEndNameAvailable" }
    set SENDeb [lindex $LocInBetween($Foc) 0]
    set SENFin [lindex $LocInBetween($Foc) end]
    if {$SENDeb==""||$SENFin==""} { return "Error NoGenome" } 
    if {$Position<[lindex $SENDeb 0] || $Position>[lindex $SENFin 1]} {
	return "Outside $SENDeb $SENFin"
    }

    set OldAvant $SENDeb
    set OldApres $SENFin
    while 1 {
	#Espionne "$OldAvant $OldApres"
	#Espionne "$D $F"
	if {$F<$D} { 
	    #Espionne "$F $D"
	    set StatusPremier "Between $OldAvant $OldApres"
	    break
	} 
	set M [expr ($D+$F)/2]
	set SEN [lindex $LocInBetween($Foc) $M]
	#Espionne "$M $SEN"
	set Start [lindex $SEN 0]
	set End   [lindex $SEN 1]
	if {$Start<=$Position && $Position<=$End} {
	    set LesIns [list "In $SEN"]
	    #Espionne "$M $SEN"
	    for {set I [expr -$NDeChaqueCote]} { $I<=$NDeChaqueCote} { incr I } {
		set MM [expr $M + $I]
		set SEN [lindex $LocInBetween($Foc) $MM]
		if {$SEN==""} { continue }
		set Start [lindex $SEN 0]
		set End   [lindex $SEN 1]
		if {$FinPosition<$Start || $End<$Position} { continue }
		#Espionne "$MM $SEN"
		lappend LesIns "In $SEN"
	    }
	    set LesIns [lsort -unique $LesIns]
	    return $LesIns
	}
	if {$D==$F} {
	    if {$Position<$Start} {set OldApres $SEN}
	    if {$End<$Position} {set OldAvant $SEN}
	    set StatusPremier "Between $OldAvant $OldApres"
	    break
	}
	if {$Position<$Start} {set OldApres $SEN; set F [expr $M -1]}
	if {$End<$Position}   {set OldAvant $SEN; set D [expr $M +1]}
    }
    #quand trouve Between dans la boucle du dessus, cherche si il y a pas tout de meme des In
    set LesIns {}
    for {set I [expr -$NDeChaqueCote]} { $I<=$NDeChaqueCote} { incr I } {
	set MM [expr $M + $I]
	set SEN [lindex $LocInBetween($Foc) $MM]
	if {$SEN==""} { continue }
	set Start [lindex $SEN 0]
	set End   [lindex $SEN 1]
	if {$FinPosition<$Start || $End<$Position} { continue }
	#Espionne "$MM $SEN"
	lappend LesIns "In $SEN"
    }
    set LesIns [lsort -unique $LesIns]
    if {$LesIns!={}} { return $LesIns }
    
    return $StatusPremier
}

proc LocUcscEssai {{Qui ""} {Quoi ""}} {
    Wup "Attention UCSC compte a partir de 0"

    set RepertoireUcsc "/genomics/link/UCSCGenomes" 
    set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/knownGene.txt"

    if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
    if {$Quoi==""} { set Quoi "line" }

    if {[info exists LocUcscEssai($Qui,$Quoi)]} { return $LocUcscEssai($Qui,$Quoi) }
    if {[info exists LocUcscEssai("EstCharge")]} { return "" }
    set LocUcscEssai("EstCharge") 1

    set LesTetes [list KgId chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds proteinAcc alignID] 
    set LocUcscEssai(ListOf,Header) $LesTetes
    return [LocUcscEssai $Qui $Quoi]
}

proc LocUcsc {{Qui ""} {Quoi ""} {Organism ""}} {
    global LocUcsc
    Wup "Attention UCSC compte a partir de 0 pour les start MAIS a partir de 1 pour les end !!!!!!!!!!!!!!!!!!!!!!!!"
    Wup "ATTENTION depuis 20200115 on proend Homom_sapiens par defaut"
    Wup "Pour la souris faire d'abord LocUcsc Mus_musculus"

    set RepertoireUcsc "/genomics/link/UCSCGenomes" 
   # set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/mm9/last/knownGene.txt"
    set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/mm9/last/refGene.txt"
    set FichierLocUcscHuman "$RepertoireUcsc/Homo_sapiens/database/hg19/last/refGene.txt"

    #rR On peut repositionner le defaut
    if {[regexp -nocase "^H" $Organism]} {
	if {[info exists LocUcsc(organism,file)]} {
	    if {$LocUcsc(organism,file)!=$FichierLocUcscHuman} {
		unset LocUcsc
	    }
	}
	set LocUcsc(organism,file) $FichierLocUcscHuman
    }
    if {[regexp -nocase "^M" $Organism]} {
	if {[info exists LocUcsc(organism,file)]} {
	    if {$LocUcsc(organism,file)!=$FichierLocUcscMouse} {
		unset LocUcsc
	    }
	}
	set LocUcsc(organism,file) $FichierLocUcscMouse
    }

    if { ! [info exists LocUcsc(organism,file)]} { set LocUcsc(organism,file) $FichierLocUcscHuman }
    
    if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
    if {$Quoi==""} { set Quoi "line" }

    if {[info exists LocUcsc($Qui,$Quoi)]} { return $LocUcsc($Qui,$Quoi) }
    if {[info exists LocUcsc("EstCharge")]} {
	if {[info exists LocUcsc($Qui,KgId)]} {
	    set KgId $LocUcsc($Qui,KgId)
	    if {[info exists LocUcsc($KgId,$Quoi)]} { return $LocUcsc($KgId,$Quoi) }
	}
	return ""
    }
    set LocUcsc("EstCharge") 1

    set LesTetes [list KgId NM chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds Zero proteinAcc alignID] 
    set LocUcsc(ListOf,Header) $LesTetes
    foreach Ligne [LesLignesDuFichier $LocUcsc(organism,file)] {
	#rR c est un peu maladroit de indiquer que le blanc ne doit pas etre reduit de 2 a 1
	set LesMots [LesMotsDeLaLigne $Ligne " "]
	set KgId [lindex $LesMots 0]
	lappend LocUcsc(ListOf,KgId) $KgId
	foreach Tete $LesTetes Mot $LesMots {
	    set $Tete $Mot
	    set LocUcsc($KgId,$Tete) $Mot
	}
	
	lappend LocUcsc(ListOf,proteinAcc) $proteinAcc
	lappend LocUcsc($proteinAcc,KgId) $KgId
	
	set LocUcsc($KgId,line) $Ligne
	lappend LocUcsc(ListOf,chrom) $chrom
	
	#LocUcsc ListOfchr1 StartEndName
	set StartEndName [list $txStart $txEnd $strand $KgId]
	lappend LocUcsc(ListOf$chrom,StartEndName) $StartEndName
    }
    set LocUcsc(ListOf,chrom) [lsort -dictionary -unique $LocUcsc(ListOf,chrom)]
    return [LocUcsc $Qui $Quoi]
}

proc LocUcscOld {{Qui ""} {Quoi ""}} {
    Wup "Attention UCSC compte a partir de 0"
    
    global LocUcsc

    #set WithoutQDS [expr [info exists LocUcsc(FichierLocUcscHuman)] || [info exists LocUcsc(FichierLocUcscMouse)]]
    #if { ! [OnTraiteUCSCGenomes] && ! $WithoutQDS} { return [QuestionDeScience "UCSCGenomes" "ret LocUcsc $Qui $Quoi"] }

    #if {[OnTraiteUCSCGenomes]} {
	#set RepertoireUcsc [RepertoireDuGenome]239RefSq.affyID_xenoAnnot.txt

    #} else {
	#set RepertoireUcsc "/genomics/link/UCSCGenomes"
    #   }

    set RepertoireUcsc "/genomics/link/UCSCGenomes"

    if {$Qui==""} { set Qui "ListOf" ; set Quoi "Header" }
    if {$Quoi==""} { set Quoi "line" }

    if {[info exists LocUcsc($Qui,$Quoi)]} { return $LocUcsc($Qui,$Quoi) }
    if {[info exists LocUcsc("EstCharge")]} { return "" }
    set LocUcsc("EstCharge") 1

    if {[info exists LocUcsc(FichierLocUcscHuman)]} {
	set FichierLocUcscHuman $LocUcsc(FichierLocUcscHuman)
    } else {
	set FichierLocUcscHuman "$RepertoireUcsc/Homo_sapiens/database/knownGene.txt"
    }
    if {[info exists LocUcsc(FichierLocUcscMouse)]} {
	set FichierLocUcscMouse $LocUcsc(FichierLocUcscMouse)
    } else {
	set FichierLocUcscMouse "$RepertoireUcsc/Mus_musculus/database/knownGene.txt"
    }

    set OnTraiteRefGene [regexp "refGene" [file tail $FichierLocUcscMouse]] 
    if {$OnTraiteRefGene} {
	set EnteteCommuneRefGene "bin name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds id name2 cdsStartStat cdsEndStat exonFrames"
	set LesTetes [split $EnteteCommuneRefGene " "]
    } else {
	set EnteteCommuneKnownGene   "KgId chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds proteinAcc alignID"
	set LesTetes [split $EnteteCommuneKnownGene " "]
    }
    set LocUcsc(ListOf,Header) $LesTetes
    
    foreach Ligne [concat \
	    [list "Human"] [LesLignesDuFichier $FichierLocUcscHuman] \
	    [list "Mouse"] [LesLignesDuFichier $FichierLocUcscMouse]]  {
	if {[regexp {^(Human|Mouse)$} $Ligne]} { set HumMou $Ligne ; continue }
	set LesMots [LesMotsDeLaLigne $Ligne]
	if {$OnTraiteRefGene} {
	    set Name [lindex $LesMots 1]
	} else {
	    set Name [lindex $LesMots 0]
	}
	if {[info exists LocUcsc($Name,name)]} {
	    set ReferenceName $Name
	    set Ieme 1
	    set NewName "$Name-[incr Ieme]"
	    while {[info exists LocUcsc($NewName,name)]} { set NewName "$Name-[incr Ieme]" }
	    set LocUcsc($NewName,ReferenceName) $ReferenceName
	    set Name $NewName
	    lappend LocUcsc(PolyLocalisationOf,$ReferenceName) $NewName
	    lappend LocUcsc(ListOf,PolyLocalisation) $NewName
	}
	lappend LocUcsc(ListOf$HumMou,Access) $Name
	lappend LocUcsc(ListOf,Access) $Name
	foreach Tete $LesTetes Mot $LesMots {
	    set $Tete $Mot
	    set LocUcsc($Name,$Tete) $Mot
	}
	if {$OnTraiteRefGene} {
	    lappend LocUcsc(ListOf,name2) $name2
	    lappend LocUcsc($name2,line)  $Name
	} else {
	    lappend LocUcsc(ListOf,proteinID) $proteinID
	    lappend LocUcsc($proteinID,line) $Name
	}
	set LocUcsc($Name,line) $Ligne
	lappend LocUcsc(ListOf$HumMou,chrom) $chrom
	set StartEndName [list $txStart $txEnd $strand $Name]
	lappend LocUcsc(ListOf$HumMou$chrom,StartEndName) $StartEndName
    }
    
    foreach Org [list "Human" "Mouse"] {
	set LocUcsc(ListOf$Org,chrom) [lsort -unique $LocUcsc(ListOf$Org,chrom)]
	foreach Chr $LocUcsc(ListOf$Org,chrom) {
	    set LocUcsc(ListOf$Org$Chr,StartEndName) [lsort -integer -index 0 $LocUcsc(ListOf$Org$Chr,StartEndName)]
	}
    }
    return [LocUcsc $Qui $Quoi]
}

proc NotationUCSC {Qui {Quoi ""}} {

    scan $Qui "%s" Qui
    regsub ">" $Qui "" Qui
    set QUI [string toupper $Qui]

    if {$Quoi==""} {
	if {[regexp "^CHR" $QUI]} {
	    set Quoi "IGBMC"
	} else {
	    set Quoi "UCSC"
	}
    } 
    global NotationUCSC
    if {[info exists NotationUCSC($QUI,$Quoi)]} {return $NotationUCSC($QUI,$Quoi)}
    if {[info exists NotationUCSC("EstCharge")]} {return ""}

    set NotationUCSC("EstCharge") 1

    set L { chr1 chr1_random chr2 chr2_random chr3 chr3_random chr4 chr4_random \
	    chr5 chr5_random chr6 chr6_random chr7 chr7_random chr8 chr8_random \
	    chr9 chr9_random chr10 chr10_random chr11 chr12 chr12_random \
	    chr13 chr13_random chr14 chr15 chr15_random chr16 chr16_random \
	    chr17 chr17_random chr18 chr18_random chr19 chr19_random chr20 \
	    chr21 chr22 chr22_random \
	    chrX chrX_random chrY chrM chr6_hla_hap1 chr6_hla_hap2 }
    foreach Chr $L {
	set CHR [string toupper $Chr]
	set K $Chr
	regsub "chr" $K "HS" K
	regsub "_random" $K "rand" K
	regsub "_hla_hap1" $K "hap1" K
	regsub "_hla_hap2" $K "hap2" K
	if {[regexp {HS([0-9]+)} $K Match n]} {
	    set NN [format "%2.2d" $n]
	    regsub "$n" $K $NN K
	}
	set Access "UCSC:$K"
	set Header ">$Access Homo sapiens complete genome $Access Build_35_hg17_May_2004"
	set NotationUCSC($CHR,UCSC)  $Chr
	set NotationUCSC($CHR,IGBMC) $Access
	set NotationUCSC($Access,UCSC) $Chr
	set NotationUCSC($Access,IGBMC) $Access
	set NotationUCSC($CHR,Header) $Header
	set NotationUCSC($Access,Header) $Header
	lappend NotationUCSC(LISTOF,UCSC) $Chr
	lappend NotationUCSC(LISTOF,IGBMC) $Access
	lappend NotationUCSC(LISTOF,Header) $Header
    }
    return [NotationUCSC $Qui $Quoi]
}

proc HumanGenomeUcsc {} {
    set I [open "HumanUCSC_Build37_hg17_May2004_origine" "r"]
    set O [open "HumanUCSC_Build37_hg17_May2004" "w"]
    while {[gets $I Ligne]>=0} {
	if {[regexp {>([^ ]+)} $Ligne Match X]} {
	    set Ligne NotationUCSC($X,Header)
	}
	puts $O $Ligne
    }
    close $I
    close $O
    exit
}






Index by: file name | procedure name | procedure call | annotation
File generated 2022-04-05 at 12:55.