Index by: file name | procedure name | procedure call | annotation
gscope_yeast.tcl (annotations | original source)

#rR gscope_yeast.tcl

#rR Here you'll find 2 kinds of projects
#rR 1/ the Joy project with 9 yeast genomes (Johans's Yeasts) 
#rR 2/ the YeastGenome with 92 strains + S88C 

#rR The Joy project
#rR We have 9 .gbff genbank files containing the chromosomes of 9 yeast genomes. 

proc JoyCreateTfasDesCopains {} {
    file mkdir "[RepertoireDuGenome]/tfasdescopains"
    set LesFichiersCrees {}
    foreach Nom [ListeDesPABs] {
	Espionne $Nom
	set FicBlast [GscopeFile $Nom "blastpJoy"]
	if {[FileAbsent $FicBlast]} { continue }
	set NbHits [DecortiqueBlast $FicBlast 1.0e-10  "" Query LesHits "" "" "" LesExpects]
	if {$NbHits==0} { continue }
	if {[info exists DejaVu]} { unset DejaVu }
	set LeTfasDesCopains {}
	foreach Hit $LesHits Expect $LesExpects {
	    if { ! [regexp {(Joy([A-Z][a-z])CDS)([0-9]+)$} $Hit Match JoyFriend Gs I]} { continue }
	    if {[info exists DejaVu($Gs)]} { continue }
	    set DejaVu($Gs) 1
	    Espionne =========== $Hit $Gs $I $JoyFriend $Expect
	    lappend LeTfasDesCopains [ContenuDuFichier "[JoyDir]/$JoyFriend/prottfa/$Hit"]
	}
	if {[llength $LeTfasDesCopains]<2} { continue }
	lappend LesFichiersCrees [SauveLesLignes $LeTfasDesCopains dans [GscopeFile $Nom "tfasdescopains"]]
    }
    return $LesFichiersCrees
}

proc JoyVersusOi {} {
    foreach Ox [JoyCode ListOf Ox] {
	set Os [JoyCode $Ox OS]
	set OiOs [OiCode $Ox OS]
	Espionne $Ox $Os = $OiOs =
    }
    exit
}

proc DeployJoy {} {
    #rR Johan-s Yeasts
    #rR We start with the list of .gbff files in /genomics/link/Joy/DuNCBI
    #rR JoyCode provides the lists of Files, Os (the full organism name) , Gs (Genius species), Ox (taxid)
    #rR   and returns one info from an other : ie JoyCode $Os Ox returns the taxid , etc...
    #rR JoyCreateGenbankFiles creates directories and stores the genebank files in Joy/JoyGsChr/genbankfiles for Gs in Dh Kc ...
    #rR JoyGenome allows to query all these data (JoyGenome ListOf Strain , JoyGenome JoyScChr ListOfChr, etc.)
    #rR CreateJoyGscopeProjectsPourTous runs 
    #rR   CreateJoyChrGscopeProject
    #rR   CreateJoyCDSGscopeProject
    #rR So simply run :
    JoyCreateGenbankFiles
    CreateJoyGscopeProjectsPourTous
    #rR And you get the 2x9 gscope projects JoyDhChr, JoyDhCDS, ... JoyYlChr, JoyYlCDS
    #rR   ie the JoyuScChr project contains the chromosomes JoyScChr01 ... JoyScChr17 
    #rR      the JoyrScCDS project contains the mRNA and proteins JoyScCDS0001 ... JoyScCDS6008
    #rR                                     and also the tRNA, rRNA and ncRNA

    #rR If all projects are created we need to calculate the alignments.
    #rR 1/ Create all blastpdatabase foreach Gs JoyGsCDS/banques/AllProttfa
    #rR 2/ Create the .pal merging all these blastdatabases in JoyScCDS/banques/
    #rR 3/ Run the 6008 blast for all Sc proteins
    #rR 4/ Create the 6008 tfasdescopains/
    #rR 5/ Run the 6008 DbClustal 
    #rR 6/ Create the nucleotide aligments using the protein alignments.
    #rR
    #rR Now you can run the CirCode Statistics as with MAMA : Joy00, Joy01, etc 
    
}

proc JoyCreateAllProttfaPourTous {} {
    set LesBanquesCreees {}
    foreach Science [JoyGenome ListOf JoyCDS] {
	lappend LesBanquesCreees [JoyCreateAllProttfa $Science]
    }
    return $LesBanquesCreees
}

proc JoyCreateAllProttfa Science {
    return [QuestionDeScience $Science "ret CreeUneBanqueBlast AllProttfa"]
}

proc JoyCreateAllJoyAllProttfa {} {
    set AllBanks {}
    foreach JoyCDS [JoyGenome ListOf JoyCDS] {
	set ItsBank "[JoyDir]/$JoyCDS/banques/AllProttfa"
	lappend AllBanks $ItsBank
    }
    set LePal {}
    lappend LePal "# Alias file created by JoyCreateAllJoyPal"
    lappend LePal "#"
    lappend LePal "TITLE AllJoyAllProttfa"
    lappend LePal "#"
    lappend LePal "DBLIST [join $AllBanks { }]"
    lappend LePal "#"
    return [SauveLesLignes $LePal dans "[RepertoireDuGenome]/banques/AllJoyAllProttfa.pal"]
}

proc JoyDir {} {
    return "/genomics/link/Joy"
}

proc CreateJoyGscopeProjectsPourTous {{Ask ""}} {
    set Ask [string equal -nocase $Ask "Ask"]
    foreach Strain [JoyGenome ListOf Strain] {
	set FichierAdn "/genomics/link/Joy/${Strain}CDS/beton/adn"
	if {[file exists $FichierAdn] && (! $Ask || [OuiOuNon "$FichierAdn already exists. Do I keep it" 0])} { continue }
	set Fait [CreateJoyGscopeProjects $Strain]
	lappend LesFaits $Fait
    } 
    return $LesFaits
}

proc CreateJoyGscopeProjects Strain {
    CreateJoyChrGscopeProject $Strain
    CreateJoyCDSGscopeProject $Strain
    Espionne "${Strain}Chr et ${Strain}CDS ont été créés. Pour les voir :" 
    Espionne "setgscoperr ${Strain}Chr ; gscope"
    Espionne "setgscoperr ${Strain}CDS ; gscope"
    return "${Strain}CDS"
}

proc CreateJoyCDSGscopeProject {Strain} {
    return [CreateYeastCDSGscopeProject $Strain "Joy"]
}

proc CreateJoyChrGscopeProject Strain {
    
    set StrainChr "${Strain}Chr"

    set RG "[JoyDir]/$StrainChr"
    if { ! [file exists $RG]} { file mkdir $RG } 
    set Link "/genomics/link/$StrainChr"
    if { ! [file exist $Link]} { exec ln -sf $RG /genomics/link/$StrainChr }

    set Prefixe $StrainChr

    NousAllonsAuBoulot $RG
    file mkdir beton
    file mkdir fiches
    file mkdir infos
    file mkdir genbankfiles
    file mkdir nuctfa
    file mkdir nucembl
    file mkdir cds
    file mkdir rna
    set N 0
    foreach Chr [JoyGenome $StrainChr ListOfChr] {
	incr N
	set Nom $Strain$Chr
	set FicGb "genbankfiles/$Nom"
	Espionne "$Nom $FicGb"
	set FicTfa   nuctfa/$Nom
	set FicInfos infos/$Nom
	set LesLignesGb [LesLignesDuFichier $FicGb]
	if { 1 } {
	    set Locus      "LocusUnknown"
	    set Definition "DefinitionUnknown"
	    set OS         "OSUnknown"
	    set OX         "OXUnknown"
	    set OnAttendOrigin 1
	    set Sequence ""
	    set OnEstDansCDS 0
	    set OnEstDansRNA 0
	    set LesCDSs {}
	    set LesRNAs {}
	    set CEstFini 0
	    foreach Ligne $LesLignesGb {
		if {[regexp "^//" $Ligne]} { break }
		regexp {^LOCUS +([^ ]+)( |$)}   $Ligne Match Locus
		regexp {^DEFINITION +(.+)$}     $Ligne Match Definition
		regexp {/organism=\"([^\"]+)\"} $Ligne Match OS
		regexp {taxon\:([^\"]+)\"}      $Ligne Match OX

		if { ! $OnEstDansCDS && [regexp {^     CDS             } $Ligne]} {
		    set OnEstDansCDS 1
		    set LeCDS {}
		    lappend LeCDS $Ligne
		    continue
		}
		if { ! $OnEstDansRNA && [regexp {^     (r|t|nc)RNA   } $Ligne]} {
		    set OnEstDansRNA 1
		    set LeRNA {}
		    lappend LeRNA $Ligne
		    continue
		}
		if {$OnEstDansCDS} {
		    if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; set CEstFini 1 }
		    if {$CEstFini || [regexp -nocase {^     [a-z]} $Ligne]} {
			set CDS [NiceCDSFromCDSLines $LeCDS]
			#Espionne $CDS
			lappend LesCDSs $CDS
			set OnEstDansCDS 0
			continue
		    }
		    lappend LeCDS $Ligne
		    continue
		}
		if {$OnEstDansRNA} {
		    if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; set CEstFini 1 }
		    if {$CEstFini || [regexp -nocase {^     [a-z]} $Ligne]} {
			set RNA [NiceRNAFromRNALines $LeRNA]
			#Espionne $RNA
			lappend LesRNAs $RNA
			set OnEstDansRNA 0
			continue
		    }
		    lappend LeRNA $Ligne
		    continue
		}
		if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; continue }

		if {$OnAttendOrigin} { continue }
		set Seq $Ligne
		regsub -all -nocase {[^a-z]} $Seq "" Seq
		append Sequence $Seq
	    }
	    #FaireLire [string length $Sequence]
	    Sauve $LesCDSs dans cds/$Nom 
	    Sauve $LesRNAs dans rna/$Nom 
	    set Entete ">$Nom $Locus $OS DE=$Definition OX=$OX"
	    set Tfa [SequenceFormatTFA $Sequence $Entete "nucbrut"]
	    Sauve $Tfa dans $FicTfa
	    set LesInfos {}
	    set OC [TaxClass $OX Name]
	    regsub {cellular organism; } $OC "" OC
	    lappend LesInfos "Nom: $Nom"
	    lappend LesInfos "Locus: $Locus"
	    lappend LesInfos "Definition: $Definition"
	    lappend LesInfos "OS: $OS"
	    lappend LesInfos "OX: $OX"
	    lappend LesInfos "OC: $OC"
	    SauveLesLignes $LesInfos dans $FicInfos
	    #EspionneL $LesInfos
	}
    }
    #EspionneL $LesCDSs
    set FicBornes "fiches/bornesdespabs"
    CreeBornesDesPABsTroisGradins 1 $N $Prefixe 901 "" "2.2d" ./

    set LeMini {}
    lappend LeMini "#MiniConfig" 
    lappend LeMini "FichierSequenceADN" 
    lappend LeMini "VersionDeGscopeAuBigBang [VersionDeGscope]"
    lappend LeMini "PrefixeDesORFs $Prefixe"
    lappend LeMini "LongueurMiniDesORFs 300"
    lappend LeMini "NotreOS $OS"
    lappend LeMini "NotreOC $OC"
    lappend LeMini "NotreOX $OX"
    lappend LeMini "GenomeOuCollection Collection"
    set Mini [join $LeMini "\n"]
    Sauve $Mini dans "beton/miniconfig"

    OnRevientDuBoulot
    return $Mini
}

proc JoyCreateGenbankFiles {} {

    set LesFicGb {}
    foreach Gs [JoyCode ListOf Gs] {
	set File [JoyCode $Gs File]
	set JoyName "Joy$Gs"
	set RgDir "[JoyDir]/${JoyName}Chr"
	file mkdir $RgDir
	file mkdir "$RgDir/genbankfiles"
	set LeGb {}
	Espionne $File
	set Chr 0
	set ChrLine ""
	foreach Ligne [LesLignesDuFichier $File] {
	    lappend LeGb $Ligne
	    if {[regexp "^//" $Ligne]} {
		incr Chr
		Espionne "$Chr $ChrLine"
		set ChrLine ""
		Espionne [llength $LeGb]
		set NiceChr [format "%s%2.2d" Chr $Chr]
		set FicGb "$RgDir/genbankfiles/${JoyName}$NiceChr"
		SauveLesLignes $LeGb dans $FicGb
		lappend LesFicGb $FicGb
		set LeGb {}
	    }
	    if {[regexp {chromosome=\"} $Ligne]} {
		set ChrLine $Ligne
	    }
	}
    }
    return $LesFicGb
}

proc JoyGenome {{Qui ""} {Quoi ""}} {
    global JoyGenome

    set Qui  [string toupper $Qui]
    set Quoi [string toupper $Quoi]

    if {[info exists JoyGenome($Qui,$Quoi)]} { return $JoyGenome($Qui,$Quoi) }
    if {[info exists JoyGenome("EstCharge")]} { return "" }
    set JoyGenome("EstCharge") 1

    NousAllonsAuBoulot [JoyDir]

    set LesJoyChr [lsort [glob -nocomplain "Joy*Chr"]]

    set Reference "JoyScChr"
    set IRef [lsearch $LesJoyChr $Reference]
    set LesJoyChrWithoutReference [lreplace $LesJoyChr $IRef $IRef]
    set LesJoyChr [concat [list $Reference] $LesJoyChrWithoutReference]

    foreach JoyChr $LesJoyChr {
	regsub {Chr$} $JoyChr ""    Strain
	regsub {Chr$} $JoyChr "CDS" JoyCDS
	lappend JoyGenome(LISTOF,STRAIN) $Strain
	lappend JoyGenome(LISTOF,JOYCHR) $JoyChr
	lappend JoyGenome(LISTOF,JOYCDS) $JoyCDS
    }

    foreach JoyChr $JoyGenome(LISTOF,JOYCHR) {
	set JOYCHR [string toupper $JoyChr]
	foreach ChrPath [lsort [glob "$JoyChr/genbankfiles/${JoyChr}*"]] {
	    set ChrFile [file tail $ChrPath]
	    if { ! [regexp {Chr[0-9][0-9]$} $ChrFile Chr]} { FaireLire "Oups something is wrong with $ChrFile" }  
	    lappend JoyGenome($JOYCHR,LISTOFCHRFILE) $ChrFile  
	    lappend JoyGenome($JOYCHR,LISTOFCHR)     $Chr
	}
    }
    OnRevientDuBoulot
    return [JoyGenome $Qui $Quoi]
}

proc JoyCode {{Qui ""} {Quoi ""}} {
    global JoyCode

    set Qui  [string toupper $Qui]
    set Quoi [string toupper $Quoi]

    if {[info exists JoyCode($Qui,$Quoi)]} { return $JoyCode($Qui,$Quoi) }
    if {[info exists JoyCode("EstCharge")]} { return "" }
    set JoyCode("EstCharge") 1

    set RepJoy "/genomics/link/Joy"

    foreach Fichier [lsort [glob "$RepJoy/DuNCBI/*.gbff"]] {
	set FICHIER [string toupper $Fichier]
	set Os ""
	set Ox ""
	foreach Ligne [LesLignesDuFichier $Fichier] {
	    if { $Os=="" && [regexp {^ *ORGANISM } $Ligne]} {
		regsub {^ *ORGANISM } $Ligne "" Organism
		set Organism [string trim $Organism]
		scan $Ligne "%s %s %s" o G S
		set Os "$G $S"
		set OS [string toupper $Os]
		set Gs "[string index $G 0][string index $S 0]"
		if {[info exists DejaVu($Gs)]} { FaireLire "Duplicate $Gs $Fichier $DejaVu($Gs)" }
		set DejaVu($Gs) $Fichier
		set GS [string toupper $Gs]
		set JoyCode($FICHIER,FILE)   $Fichier
		set JoyCode($FICHIER,OS)     $Os
		set JoyCode($FICHIER,GS)     $Gs
		set JoyCode($OS,FILE)        $Fichier
		set JoyCode($OS,OS)          $Os
		set JoyCode($OS,GS)          $Gs
		set JoyCode($GS,FILE)        $Fichier
		set JoyCode($GS,OS)          $Os
		set JoyCode($GS,GS)          $Gs
		lappend JoyCode(LISTOF,FILE) $Fichier
		lappend JoyCode(LISTOF,OS)   $Os
		lappend JoyCode(LISTOF,GS)   $Gs
		continue
	    }
	    if { $Ox=="" && [regexp {/db_xref="taxon\:([0-9]+)"} $Ligne Match Ox]} {
		set OX $Ox
		lappend JoyCode(LISTOF,OX)   $Ox
		lappend JoyCode(LISTOF,OXOS) "$Ox $Os"
		set JoyCode($OX,FILE)        $Fichier
		set JoyCode($OX,OS)          $Os
		set JoyCode($OX,GS)          $Gs
		set JoyCode($FICHIER,OX)     $Ox
		set JoyCode($OS,OX)          $Ox
		set JoyCode($GS,OX)          $Ox
		set JoyCode($OX,OX)          $Ox
		break
	    }
	}
    }
    return [JoyCode $Qui $Quoi]
} 


#rR rest concerns YeastGenome

#rR j'ai ecrit ce qu'il faut pour créer un projet Gscope à partir d'un ensemble de fichiers Genbank
#rR Pour les levures par exemple :
#rR Une souche (strain) de levure (par ex YJM1078 est composée de 16 Chromosomes + celui de la mito (que je numérote Chr17)
#rR Voir YeastGenome qui gère les 92 strains + S88C (voir CreateYeastGenomesFile)
#rR Je crée deux types de projet par strain
#rR YJM1078Chr  qui contient les 17 chromosomes en tant que boîte Gscope et aussi les etxtes des CDS des fichiers Genbank
#rR YJM1078CDS  qui contient tous les CDS de tous les chromosomes mis bout à bout (avec 99 bases entre)


proc tmou {taxid} {
    
    if {1} {
    array set Tx [eSummaryREST taxonomy $taxid]
    parray Tx
    set Name $Tx($taxid,ScientificName)
    puts ""
    }
    set out [eSearchREST genome txid$taxid\[Organism\]]
    set Lid [split $out " "]
    puts [llength $Lid]
    array set T [eSummaryREST genome $Lid]
    parray T
    puts ""

    set Lgood [list]
    foreach k [array names T "*,TaxId"] {
	if {$T($k) == $taxid} {
	    lappend Lgood [lindex [split $k ,] 0]
	}
    }
    puts "Lgood [llength $Lgood]"
    exit
}

proc reste TaxId {
    set L [eSearchREST genome "txid$TaxId\[Organism\]"]
    Espionne $L
    array set T [eSummaryREST genome $L]
    parray T
    exit
}

proc YeastFile {Cds {SubDir ""}} {
    regsub {CDS[0-9]+$} $Cds "CDS" Scds
    set Fichier "[GscopeDatabaseDir $Scds]/$SubDir/$Cds"
    if {[file exists $Fichier]} { return $Fichier }
    return ""
}

proc AdnDesCopainsAlignes {} {
    set RepTfasDesCopains "[RepertoireDuGenome]/tfasdescopainsnuc"
    file mkdir $RepTfasDesCopains
    set LesFichierCrees {}
    foreach Nom [ListeDesPABs] {
	set FicCopains "$RepTfasDesCopains/$Nom"
	if {[file exists $FicCopains]} { continue }
	set LesTFAs {}
	set I -1
	set LeTout {}
	foreach Copain [YeastFriends $Nom] {
	    incr I
	    if {$I%10!=0} { continue }
	    set FicNuc [YeastFile $Copain nuctfa]
	    if {$FicNuc==""} { Warne "no file $FicNuc"; continue }
	    set TFA [ContenuDuFichier $FicNuc]

	    set FicPro [YeastFile $Copain prottfa]
	    if {$FicPro==""} { Warne "no file $FicPro"; continue }
	    set PRO [ContenuDuFichier $FicPro]
	    set SeqPro [QueLaSequenceDuTexteTFA $PRO]
	    set SeqPro [string range $SeqPro 20 end]

	    set Seq [QueLaSequenceDuTexteTFA $TFA]
	    set Seq [string range $Seq 60 end]
	    set L [string length $Seq]
	    set LesCodons {}
	    foreach R [split $SeqPro ""] {a b c} [split $Seq ""]  {
		lappend LesCodons "$R-$a$b$c"
	    }
	    set Codons [format "%-16s %5d %s" $Copain $L [join $LesCodons " "]]
	    lappend LeTout [string range $Codons 0 220]		
	}
	Espionne
	EspionneL $LeTout
	continue
	if {$LesTFAs=={}} { Warne "$Nom no TFA"; continue }
	Espionne $Nom
	lappend LesFichiersCrees [SauveLesLignes $LesTFAs dans $FicCopains]
    }
    return $LesFichierCrees
}

proc YeastTfasDesCopains {} {
    set RepTfasDesCopains "[RepertoireDuGenome]/tfasdescopains"
    file mkdir $RepTfasDesCopains
    set LesFichierCrees {}
    foreach Nom [ListeDesPABs] {
	set FicCopains "$RepTfasDesCopains/$Nom"
	if {[file exists $FicCopains]} { continue }
	set LesTFAs {}
	foreach Copain [YeastFriends $Nom] {
	    set FicProt [YeastFile $Copain prottfa]
	    if {$FicProt==""} { Warne "no file $FicProt"; continue }
	    set TFA [ContenuDuFichier $FicProt]
	    lappend LesTFAs $TFA
	}
	if {$LesTFAs=={}} { Warne "$Nom no TFA"; continue }
	Espionne $Nom
	lappend LesFichiersCrees [SauveLesLignes $LesTFAs dans $FicCopains]
    }
    return $LesFichierCrees
}

proc YeastFriends {{Qui ""} {Quoi ""}} {
    global YeastFriends

    set AvecPrecalcul 0

    if {$Quoi==""} { set Quoi "ListOfFriends" }
    if {[info exists YeastFriends($Qui,$Quoi)]}  { return $YeastFriends($Qui,$Quoi) }
    if {[info exists YeastFriends("EstCharge")]} {
	if { ! [regexp "^S288C" $Qui] && [set Ref [YeastReference $Qui]]!=""} { return [YeastFriends $Ref $Quoi] }
	return ""
    }
    set YeastFriends("EstCharge") 1

    set FichierMemo "[GscopeDatabaseDir "S288CCDS"]/fiches/YeastFriends.txt"
    if {[file exists $FichierMemo]} {
	array set YeastFriends [ContenuDuFichier $FichierMemo]
	return [YeastFriends $Qui $Quoi]
    }

    foreach Scds [lrange [YeastGenome ListOf Scds] 0 end] {
	foreach Nom [YeastListOfCds $Scds] {
	    set Ref [YeastReference $Nom]
	    lappend YeastFriends($Ref,ListOfFriends) $Nom
	    lappend YeastFriends(ListOf,Ref) $Ref
	}
    }
    set YeastFriends(ListOf,Ref) [lsort -unique $YeastFriends(ListOf,Ref)]
    if {$AvecPrecalcul} {
	foreach Ref $YeastFriends(ListOf,Ref) {
	    foreach Nom $YeastFriends($Ref,ListOfFriends) {
		if {$Nom==$Ref} { continue }
		set YeastFriends($Nom,ListOfFriends) $YeastFriends($Ref,ListOfFriends)
	    }
	}
    }
    Sauve [array get YeastFriends] dans $FichierMemo
    return [YeastFriends $Qui $Quoi]
}

proc YeastListOfCds Scds {
    set FicBornes "[GscopeDatabaseDir $Scds]/fiches/bornesdespabs"
    set LesNoms {}
    foreach Ligne [LesLignesDuFichier $FicBornes] {
	scan $Ligne "%s" Nom
	lappend LesNoms $Nom
    }
    return $LesNoms
}

proc NomDuLocusTag {{Qui ""}} {
    global NomDuLocusTag
    if {[info exists NomDuLocusTag($Qui)]}  { return $NomDuLocusTag($Qui) }
    if {[info exists NomDuLocusTag("EstCharge")]} { return "" }
    set NomDuLocusTag("EstCharge") 1
    set RepInfos "[GscopeDatabaseDir [YeastGenome S288C CDS]]/infos"
    foreach FicheInfos [lsort [glob -nocomplain "$RepInfos/*"]] {
	set Nom [file tail $FicheInfos]
	set Infos [ContenuDuFichier $FicheInfos]
	set LT [StringApres "locus_tag: " dans $Infos]
	if {$LT==""} { continue }
	lappend NomDuLocusTag(ListOf) $LT
	lappend NomDuLocusTag(ListOfNom) $Nom
	if {[info exists NomDuLocusTag($LT)]} { FaireLire "Doublon" } 
	set NomDuLocusTag($LT) $Nom
    }
    return [NomDuLocusTag $Qui]
}

proc YeastReference {{Nom ""}} {
    if {[regexp {^S288CCDS} $Nom]} { return $Nom }
    regsub {[0-9]+$} $Nom "" Scds
    set Rep [GscopeDatabaseDir $Scds]
    set FicheInfos "$Rep/infos/$Nom"
    if { ! [file exists $FicheInfos]} { return "" }
    set Infos [ContenuDuFichier $FicheInfos]
    if {[regexp {corresponds to s288c ([^\n]+)\n} $Infos Match LocusTag]} { return [NomDuLocusTag $LocusTag] }
    return ""
}

proc YeastGenomesInventory {} {
    foreach Strain [YeastGenome ListOf Strain] {
	set Scds [YeastGenome $Strain "CDS"]
	set Rep [GscopeDatabaseDir $Scds]
	set NbCds  [llength [LesLignesDuFichier "$Rep/fiches/bornesdespabs"]]
	set NbTrna [llength [LesLignesDuFichier "$Rep/fiches/bornesdestrnas"]]
	set NbRna  [llength [LesLignesDuFichier "$Rep/fiches/bornesdesarns"]]
	set Message "$Strain $NbCds $NbTrna $NbRna"
	lappend LesMessages $Message
	#Espionne $Message
    }
    return $LesMessages
}

proc CreateYeastGscopeProjectsPourTous {{Ask ""}} {
    set Ask [string equal -nocase $Ask "Ask"]
    foreach Strain [YeastGenome ListOf Strain] {
	set FichierAdn "/genomics/link/YeastGenomes/${Strain}CDS/beton/adn"
	if {[file exists $FichierAdn] && (! $Ask || [OuiOuNon "$FichierAdn already exists. Do I keep it" 0])} { continue }
	set Fait [CreateYeastGscopeProjects $Strain]
	lappend LesFaits $Fait
    } 
    return $LesFaits
}

proc CreateYeastGscopeProjects Strain {
    CreateYeastChrGscopeProject $Strain
    CreateYeastCDSGscopeProject $Strain
    Espionne "${Strain}Chr et ${Strain}CDS ont été créés. Pour les voir :" 
    Espionne "setgscoperr ${Strain}Chr ; gscope"
    Espionne "setgscoperr ${Strain}CDS ; gscope"
    return "${Strain}CDS"
}

proc CreateYeastCDSGscopeProject {Strain {Joy ""}} {
    #rR je l'ai réécrit en CreateCDSGscopeProjectFromGenbank pour êre plus général
    set StrainCDS "${Strain}CDS"


    if {$Joy=="Joy"} {
	set RG "[JoyDir]/$StrainCDS"
	if { ! [file exists $RG]} { file mkdir $RG } 
	set Link "/genomics/link/$StrainCDS"
	if { ! [file exist $Link]} { exec ln -sf $RG /genomics/link/$StrainCDS }
    }

    set Prefixe $StrainCDS
    set FormatOrfNumbering "%4.4d"

    NousAllonsAuBoulot $RG
    file mkdir beton
    file mkdir fiches
    file mkdir infos
    file mkdir nuctfa
    file mkdir nucembl
    file mkdir prottfa
    file mkdir protembl
    set RepCDS "/genomics/link/${Strain}Chr/cds"
    set RepRNA "/genomics/link/${Strain}Chr/rna"
    set N 0
    set SeqTotale "n"
    append SeqTotale [string repeat "n" 99]
    set Offset [string length $SeqTotale]
    set LesBornesDesPABs {}
    foreach FichierCDS [lsort [glob "$RepCDS/${Strain}Chr*"]] {
	set LesChrBornes {}
	set Chr "Chr00"
	if { ! [regexp {Chr[0-9]+$} $FichierCDS Chr]} { FaireLire "no Chr in $FichierCDS" }
	set ChrDir [file dirname [file dirname $FichierCDS]]
	set ChrNuc [QueLaSequenceDuFichierTFA "$ChrDir/nuctfa/${Strain}$Chr"]
	set ChrLen [string length $ChrNuc]
	set LesCDSs [ContenuDuFichier $FichierCDS]
	set OnEstDansSeq 0
	foreach CDS $LesCDSs {
	    set Bornes ""
	    set ItsOk 1
	    foreach Ligne [split $CDS "\n"] {
		if { ! [regexp {^/([^=]+)=(.*)$} $Ligne Match K V]} { Warne "Wrong $Ligne" ; set ItsOk 0; break }
		regsub -all {\"} $V "" V
		if {$K=="location"} {
		    regsub -all {[<>]} $V "" V
		    set Bornes $V
		} 
		lappend Infos($Chr,$Bornes,ListOfK) $K
		set Infos($Chr,$Bornes,$K) $V
	    }
	    if {$ItsOk} { lappend LesChrBornes "$Chr $Bornes" }
	}

	set Offset [string length $SeqTotale]
	Espionne "$Chr $ChrLen [llength $LesCDSs] CDS Offset $Offset"
	foreach ChrBornes $LesChrBornes {
	    lassign [split $ChrBornes " "] Chr Bornes
	    if { ! [regexp {([0-9]+)\.\.([0-9]+)} $Bornes Match Deb Fin ]} { Warne "Wrong location $Bornes in $ChrBornes"; continue }
	    incr N
	    set Nom [format "%s$FormatOrfNumbering" $Prefixe $N]
	    set Sens "F"
	    if {[regexp "complement" $Bornes]} { set Sens "R" }
	    regsub -all {[^0-9\,\.]} $Bornes "" Bo
	    set LesDF {}
	    foreach DF [split $Bo ","] {
		set D ""
		set F ""
		if {[regexp {^([0-9]+)\.\.([0-9]+)$} $DF Match D F]} {}
		if {[regexp {^([0-9]+)$}             $DF D]        } {set F $D}
		if {$D=="" || $F==""} { Espionne "$Bo" ; exit }
		lappend LesDF $D $F
	    }
	    set NucCds ""
	    if {0 && $Chr=="Chr10"} { Espionne "$ChrBornes $LesBo" }
	    foreach {D F} $LesDF {
		if {0 && $Chr=="Chr10"} { Espionne "=$D=$F=" } 
		if {[catch {append NucCds [string range "n$ChrNuc" $D $F]} Message]} {
		    Espionne $Message
		    Espionne "=$ChrBornes=$D=$F="
		    exit
		}
	    }
	    if {$Sens=="R"} { set NucCds [NucToReverseAndComplementNuc $NucCds] }
	    set Deb [lindex $LesDF 0]
	    set Fin [lindex $LesDF end]
	    incr Deb $Offset ; incr Deb -1
	    incr Fin $Offset ; incr Fin -1
	    lappend LesBornesDesPABs "$Nom $Deb $Fin $Sens"
	    set LaFicheInfo {}
	    lappend LaFicheInfo "Nom: $Nom"
	    lappend LaFicheInfo "Chr: $Chr"
	    lappend LaFicheInfo "Bornes: $Bornes"
	    set Gn ""
	    set ProCds ""
	    foreach K $Infos($Chr,$Bornes,ListOfK) {
		set V $Infos($Chr,$Bornes,$K)
		if {$K=="translation"} {
		    set ProCds $V
		    continue
		}
		if {$K=="gene"} { set Gn $V } 
		if {$K=="product" && $Gn==""} { set Gn $V } 
		lappend LaFicheInfo "$K: $V"
	    }
	    if {$Gn!=""} { lappend LaFicheInfo "ValiGN: $Gn" }
	    SauveLesLignes $LaFicheInfo dans "infos/$Nom"
	    set Entete ">$Nom $Gn $Bornes"
	    set NucTfa [SequenceFormatTFA $NucCds $Entete "nucbrut"]
	    set ProTfa [SequenceFormatTFA $ProCds $Entete "protbrut"]
	    Sauve $NucTfa dans "nuctfa/$Nom"
	    Sauve $ProTfa dans "prottfa/$Nom"
	}
	append SeqTotale $ChrNuc    
	set LongTotale [string length $SeqTotale]
	set ProchainStart [expr (($LongTotale + 999)/3)*3]
	set Tampon [string repeat "n" [expr $ProchainStart-$LongTotale]]
	append SeqTotale $Tampon
    }

    #rR ATTENTION ici on traite les tRNA rRNA et ncRNA
    set SeqTotaleRNA "n"
    set LesBornesDesTRNAs {}
    set LesBornesDesARNs {}
    foreach FichierRNA [lsort [glob "$RepRNA/${Strain}Chr*"]] {
	set LesChrBornes {}
	set Chr "Chr00"
	if { ! [regexp {Chr[0-9]+$} $FichierRNA Chr]} { FaireLire "no Chr in $FichierRNA" }
	set ChrDir [file dirname [file dirname $FichierCDS]]
	set ChrNuc [QueLaSequenceDuFichierTFA "$ChrDir/nuctfa/${Strain}$Chr"]
	set ChrLen [string length $ChrNuc]
	set LesRNAs [ContenuDuFichier $FichierRNA]
	set OnEstDansSeq 0
	foreach RNA $LesRNAs {
	    set Bornes ""
	    set ItsOk 1
	    foreach Ligne [split $RNA "\n"] {
		if { ! [regexp {^/([^=]+)=(.*)$} $Ligne Match K V]} { Warne "Wrong $Ligne" ; set ItsOk 0; break }
		Espionne "$K///$V"
		regsub -all {\"} $V "" V
		if {[regexp {^([^_]+)_location$} $K Match T]} {
		    #set Type [string toupper $T]
		    set Type $T
		    set K "location"
		}
		if {$K=="location"} {
		    regsub -all {[<>]} $V "" V
		    set Bornes $V
		    set Infos($Chr,$Bornes,Type) $Type
		} 
		lappend Infos($Chr,$Bornes,ListOfK) $K
		set Infos($Chr,$Bornes,$K) $V
	    }
	    if {$ItsOk} { lappend LesChrBornes "$Chr $Bornes" }
	}

	set Offset [string length $SeqTotaleRNA]
	foreach ChrBornes $LesChrBornes {
	    lassign [split $ChrBornes " "] Chr Bornes
	    if { ! [regexp {([0-9]+)\.\.([0-9]+)} $Bornes Match Deb Fin ]} { Warne "Wrong Bornes $Bornes in $ChrBornes" ; continue }
	    set Type $Infos($Chr,$Bornes,Type)
	    Espionne "$Chr $Bornes $Type"
	    set Sens "F"
	    if {[regexp "complement" $Bornes]} { set Sens "R" }
	    regsub -all {[^0-9\,\.]} $Bornes "" Bo
	    set LesDF {}
	    foreach DF [split $Bo ","] {
		set D ""
		set F ""
		if {[regexp {^([0-9]+)\.\.([0-9]+)$} $DF Match D F]} {}
		if {[regexp {^([0-9]+)$}             $DF D]        } {set F $D}
		if {$D=="" || $F==""} { Espionne "$Bo" ; exit }
		lappend LesDF $D $F
	    }
	    set NucRna ""
	    if {0 && $Chr=="Chr10"} { Espionne "$ChrBornes $LesBo" }
	    foreach {D F} $LesDF {
		if {0 && $Chr=="Chr10"} { Espionne "=$D=$F=" } 
		if {[catch {append NucRna [string range "n$ChrNuc" $D $F]} Message]} {
		    Espionne $Message
		    Espionne "=$ChrBornes=$D=$F="
		    exit
		}
	    }
	    if {$Sens=="R"} { set NucRna [NucToReverseAndComplementNuc $NucRna] }
	    set Deb [lindex $LesDF 0]
	    set Fin [lindex $LesDF end]
	    incr Deb $Offset ; incr Deb -1
	    incr Fin $Offset ; incr Fin -1

	    set Gn ""
	    set AA ""
	    set AntiCodon ""
	    foreach K $Infos($Chr,$Bornes,ListOfK) {
		set V $Infos($Chr,$Bornes,$K)
		if {$K=="product"} { set Gn [lindex [split $V " "] 0]} 
		if {[string equal -nocase $Type "tRNA"]} {
		    if {$K=="note" && [regexp {corresponds to s288c [^\(]+\(([^\)]+)\)} $V Match AntiCodon]} {
			regsub -all "U" $AntiCodon "T" AntiCodon
			Espionne $AntiCodon
		    }
		}
		lappend LaFicheInfo "$K: $V"
	    }
	    if {[string equal -nocase $Type "tRNA"]} {
		set AA $Gn
		regsub -nocase {tRNA\-} $AA "" AA
		set Nom "tRNA$AA$AntiCodon"
		if { ! [info exists NiemeDuNom($Nom)]} { set NiemeDuNom($Nom) 0 }
		incr NiemeDuNom($Nom)
		set Nom $Nom-$NiemeDuNom($Nom)
	    } else {
		set Nom "$Type-$Gn"
	    }
	    if {[string equal -nocase $Type "tRNA"]} {
		lappend LesBornesDesTRNAs "$Nom $Deb $Fin $Sens"
		Espionne "$Nom $Deb $Fin $Sens" 
	    } else {
		lappend LesBornesDesARNs  "$Nom $Deb $Fin $Sens"
	    }
	    set LaFicheInfo {}
	    lappend LaFicheInfo "Nom: $Nom"
	    lappend LaFicheInfo "Chr: $Chr"
	    lappend LaFicheInfo "Bornes: $Bornes"
	    if {$Gn!=""} { lappend LaFicheInfo "ValiGN: $Gn" }
	    SauveLesLignes $LaFicheInfo dans "infos/$Nom"
	    set Entete ">$Nom $Gn $Bornes"
	    set NucTfa [SequenceFormatTFA $NucRna $Entete "nucbrut"]
	    Sauve $NucTfa dans "nuctfa/$Nom"
	}
	append SeqTotaleRNA $ChrNuc    
	set LongTotale [string length $SeqTotaleRNA]
	set ProchainStart [expr (($LongTotale + 999)/3)*3]
	set Tampon [string repeat "n" [expr $ProchainStart-$LongTotale]]
	append SeqTotaleRNA $Tampon
    }

    set FichierSequenceADN "beton/${Strain}_All_Chromosomes.tfa"
    set TFA [SequenceFormatTFA [string range $SeqTotale 1 end] ">${Strain}_All_Chromosomes with around 99 n between chromosomes"]
    Sauve $TFA dans $FichierSequenceADN
    set MiniChr [ContenuDuFichier "/genomics/link/${Strain}Chr/beton/miniconfig"]
    set OS [StringSuivant "NotreOS " dans $MiniChr]
    set OC [StringSuivant "NotreOC " dans $MiniChr]
    set OX [StringSuivant "NotreOX " dans $MiniChr]
    set LeMini {}
    lappend LeMini "#MiniConfig" 
    lappend LeMini "FichierSequenceADN $Link$FichierSequenceADN" 
    lappend LeMini "VersionDeGscopeAuBigBang [VersionDeGscope]"
    lappend LeMini "PrefixeDesORFs $Prefixe"
    lappend LeMini "LongueurMiniDesORFs 0"
    lappend LeMini "NotreOS $OS"
    lappend LeMini "NotreOC $OC"
    lappend LeMini "NotreOX $OX"
    lappend LeMini "GenomeOuCollection Genome"
    set Mini [join $LeMini "\n"]
    Sauve $Mini dans "beton/miniconfig"
    SauveLesLignes $LesBornesDesPABs  dans "fiches/bornesdespabs"
    SauveLesLignes $LesBornesDesTRNAs dans "fiches/bornesdestrnas"
    SauveLesLignes $LesBornesDesARNs  dans "fiches/bornesdesarns"
    Espionne "CreeADNetTDNetRAC"
    Espionne [CreeADNetTDNetRAC [string range $SeqTotale 1 end] "" "."]
    OnRevientDuBoulot
    return "dernier cree : $Nom"
}

proc NiceCDSFromCDSLines CDSLines {
    set LesInfos {}
    set LesLignesDeLInfoCourante {}
    lappend CDSLines "/LASTLINE="
    foreach Ligne $CDSLines {
	regsub {^     CDS             } $Ligne "/location=" Ligne
	regsub {^                     } $Ligne ""           Ligne
	if {[regexp {^/([^=]+)=} $Ligne]} {
	    if {$LesLignesDeLInfoCourante!=""} {
		set JoinCar " "
		regexp {^/([^=]+)=} [lindex $LesLignesDeLInfoCourante 0] Match Key
		if {$Key=="location" || $Key=="translation"} { set JoinCar "" }
		set Info [join $LesLignesDeLInfoCourante $JoinCar]
		regsub -all {\[} $Info "(" Info              ;#rR trop dangereux de les garder
		regsub -all {\]} $Info ")" Info              ;#rR trop dangereux de les garder
		lappend LesInfos $Info
		set LesLignesDeLInfoCourante {}
	    }
	    if {$Ligne=="/LASTLINE="} { break }
	}
	lappend LesLignesDeLInfoCourante $Ligne
    }
    return [join $LesInfos "\n"]
}

proc NiceRNAFromRNALines RNALines {
    set LesInfos {}
    set LesLignesDeLInfoCourante {}
    lappend RNALines "/LASTLINE="
    foreach Ligne $RNALines {
	regexp {^     ((m|r|t|nc)RNA) +} $Ligne Match Type
	regsub {^     ((m|r|t|nc)RNA) +} $Ligne "/${Type}_location=" Ligne
	regsub {^ +}                     $Ligne ""                   Ligne
	if {[regexp {^/([^=]+)=} $Ligne]} {
	    if {$LesLignesDeLInfoCourante!=""} {
		set JoinCar " "
		regexp {^/([^=]+)=} [lindex $LesLignesDeLInfoCourante 0] Match Key
		if {[regexp "RNA_location" $Key]} { set JoinCar "" }
		set Info [join $LesLignesDeLInfoCourante $JoinCar]
		regsub -all {\[} $Info "(" Info              ;#rR trop dangereux de les garder
		regsub -all {\]} $Info ")" Info              ;#rR trop dangereux de les garder
		lappend LesInfos $Info
		set LesLignesDeLInfoCourante {}
	    }
	    if {$Ligne=="/LASTLINE="} { break }
	}
	lappend LesLignesDeLInfoCourante $Ligne
    }
    return [join $LesInfos "\n"]
}

proc CreateYeastChrGscopeProject Strain {
    
    set StrainChr "${Strain}Chr"

    set RG "/gstock/YeastGenomes/$StrainChr"
    if { ! [file exists $RG]} { file mkdir $RG } 
    set Link "/genomics/link/$StrainChr"
    if { ! [file exist $Link]} { exec ln -sf $RG /genomics/link/$StrainChr }

    set Prefixe $StrainChr

    NousAllonsAuBoulot $RG
    file mkdir beton
    file mkdir fiches
    file mkdir infos
    file mkdir genbankfiles
    file mkdir nuctfa
    file mkdir nucembl
    file mkdir cds
    file mkdir rna
    set N 0
    foreach Chr [YeastGenome ListOf Chr] {
	incr N
	set Nom $Strain$Chr
	set FicGb "genbankfiles/$Nom"
	set Id [YeastGenome $Strain $Chr]
	if {[file exists $FicGb]} {
	    set Gb [ContenuDuFichier $FicGb]
	} else {
	    set Gb [GenbankNucleotide $Id gb "GetText"]
	    Sauve $Gb dans $FicGb
	}
	Espionne "$Nom $FicGb"
	set FicTfa   nuctfa/$Nom
	set FicInfos infos/$Nom
	if { 1 } {
	    set Locus      "LocusUnknown"
	    set Definition "DefinitionUnknown"
	    set OS         "OSUnknown"
	    set OX         "OXUnknown"
	    set OnAttendOrigin 1
	    set Sequence ""
	    set OnEstDansCDS 0
	    set OnEstDansRNA 0
	    set LesCDSs {}
	    set LesRNAs {}
	    set CEstFini 0
	    foreach Ligne [split $Gb "\n"] {
		if {[regexp "^//" $Ligne]} { break }
		regexp {^LOCUS +([^ ]+)( |$)}   $Ligne Match Locus
		regexp {^DEFINITION +(.+)$}     $Ligne Match Definition
		regexp {/organism=\"([^\"]+)\"} $Ligne Match OS
		regexp {taxon\:([^\"]+)\"}      $Ligne Match OX

		if { ! $OnEstDansCDS && [regexp {^     CDS             } $Ligne]} {
		    set OnEstDansCDS 1
		    set LeCDS {}
		    lappend LeCDS $Ligne
		    continue
		}
		if { ! $OnEstDansRNA && [regexp {^     (r|t|nc)RNA   } $Ligne]} {
		    set OnEstDansRNA 1
		    set LeRNA {}
		    lappend LeRNA $Ligne
		    continue
		}
		if {$OnEstDansCDS} {
		    if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; set CEstFini 1 }
		    if {$CEstFini || [regexp -nocase {^     [a-z]} $Ligne]} {
			set CDS [NiceCDSFromCDSLines $LeCDS]
			#Espionne $CDS
			lappend LesCDSs $CDS
			set OnEstDansCDS 0
			continue
		    }
		    lappend LeCDS $Ligne
		    continue
		}
		if {$OnEstDansRNA} {
		    if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; set CEstFini 1 }
		    if {$CEstFini || [regexp -nocase {^     [a-z]} $Ligne]} {
			set RNA [NiceRNAFromRNALines $LeRNA]
			#Espionne $RNA
			lappend LesRNAs $RNA
			set OnEstDansRNA 0
			continue
		    }
		    lappend LeRNA $Ligne
		    continue
		}
		if {$OnAttendOrigin && [regexp "^ORIGIN" $Ligne]} { set OnAttendOrigin 0 ; continue }

		if {$OnAttendOrigin} { continue }
		set Seq $Ligne
		regsub -all -nocase {[^a-z]} $Seq "" Seq
		append Sequence $Seq
	    }
	    #FaireLire [string length $Sequence]
	    Sauve $LesCDSs dans cds/$Nom 
	    Sauve $LesRNAs dans rna/$Nom 
	    set Entete ">$Nom $Locus $OS DE=$Definition OX=$OX"
	    set Tfa [SequenceFormatTFA $Sequence $Entete "nucbrut"]
	    Sauve $Tfa dans $FicTfa
	    set LesInfos {}
	    set OC [TaxClass $OX Name]
	    regsub {cellular organism; } $OC "" OC
	    lappend LesInfos "Nom: $Nom"
	    lappend LesInfos "Locus: $Locus"
	    lappend LesInfos "Definition: $Definition"
	    lappend LesInfos "OS: $OS"
	    lappend LesInfos "OX: $OX"
	    lappend LesInfos "OC: $OC"
	    SauveLesLignes $LesInfos dans $FicInfos
	    #EspionneL $LesInfos
	}
    }
    #EspionneL $LesCDSs
    set FicBornes "fiches/bornesdespabs"
    CreeBornesDesPABsTroisGradins 1 $N $Prefixe 901 "" "%2.2d" ./

    set LeMini {}
    lappend LeMini "#MiniConfig" 
    lappend LeMini "FichierSequenceADN" 
    lappend LeMini "VersionDeGscopeAuBigBang [VersionDeGscope]"
    lappend LeMini "PrefixeDesORFs $Prefixe"
    lappend LeMini "LongueurMiniDesORFs 300"
    lappend LeMini "NotreOS $OS"
    lappend LeMini "NotreOC $OC"
    lappend LeMini "NotreOX $OX"
    lappend LeMini "GenomeOuCollection Collection"
    set Mini [join $LeMini "\n"]
    Sauve $Mini dans "beton/miniconfig"

    OnRevientDuBoulot
    return $Mini
}

proc CreateYeastGenomesFile {} {
    #rR Julie m'avait donné les strains YJM 
    #rR Je mets en début S288C 
    #rR  ... on peut en rajouter d'autres.
    set FichierCsv [YeastGenome Filename]
    set FichierCsvYJMstrains "[file dirname $FichierCsv]/YJMstrains.csv"
    set LesLignesYJMstrains [LesLignesDuFichier $FichierCsvYJMstrains]
    set LesLignes {}
    lappend LesLignes [lindex $LesLignesYJMstrains 0] ; #rR pour le titre
    lappend LesLignes [CsvLineForS288C]
    LConcat LesLignes [lrange $LesLignesYJMstrains 1 end]
    SauveLesLignes $LesLignes dans $FichierCsv
    return $FichierCsv
}

proc YeastGenome {{Qui ""} {Quoi ""}} {
    global YeastGenome
    #rR Pour la mise à jour du fichier voir CreateYeastGenomesFile
    set FichierCsv "[GscopeDatabaseDir YeastGenomes]/DuNCBI/YeastGenomes.csv"
    set YeastGenome(Filename,) $FichierCsv

    if {[string equal -nocase $Quoi Chr]} { return "${Qui}Chr" }
    if {[string equal -nocase $Quoi CDS]} { return "${Qui}CDS" }

    if {[info exists YeastGenome($Qui,$Quoi)]} { return $YeastGenome($Qui,$Quoi) }
    if {[info exists YeastGenome("EstCharge")]} { return "" }
    set YeastGenome("EstCharge") 1

    LoadTxl $FichierCsv YeastGenome 0 ";"
    #parray YeastGenome
    set YeastGenome(ListOf,Strain) $YeastGenome(ListOf,Index)

    set Prems [lindex $YeastGenome(ListOf,Strain) 0]
    foreach Strain $YeastGenome(ListOf,Strain) {
	set YeastGenome($Strain,Chr17) $YeastGenome($Strain,Mito)
    }

    set YeastGenome(ListOf,Id)  {}
    set YeastGenome(ListOf,Chr) {}
    set L [lsort [array names YeastGenome "$Prems,Chr*"]]

    foreach SC $L {
	regsub "$Prems," $SC "" Chr
	lappend YeastGenome(ListOf,Chr) $Chr
    }
    foreach Strain $YeastGenome(ListOf,Strain) {
	lappend YeastGenome(ListOf,Scds) "${Strain}CDS" 
	lappend YeastGenome(ListOf,Schr) "${Strain}Chr" 
	foreach Chr $YeastGenome(ListOf,Chr) {
	    set Id $YeastGenome($Strain,$Chr)
	    lappend YeastGenome(ListOf,Id) $Id
	    set YeastGenome($Id,Strain)    $Strain
	    set YeastGenome($Id,Chr)       $Chr
	    set YeastGenome($Id,StrainChr) "$Strain-$Chr"
	}
    }
    return [YeastGenome $Qui $Quoi]
}

proc GetGenbankForS288Cne_sert_plus {} {
    NousAllonsAuBoulot "/genomics/link/YeastGenomes/S288CChr/genbankfiles"
    set Chromosomes {
	chromosome I:NC_001133.9/BK006935.2
	chromosome II:NC_001134.8/BK006936.2
	chromosome III:NC_001135.5/BK006937.2
	chromosome IV:NC_001136.10/BK006938.2
	chromosome V:NC_001137.3/BK006939.2
	chromosome VI:NC_001138.5/BK006940.2
	chromosome VII:NC_001139.9/BK006941.2
	chromosome VIII:NC_001140.6/BK006934.2
	chromosome IX:NC_001141.2/BK006942.2
	chromosome X:NC_001142.9/BK006943.2
	chromosome XI:NC_001143.9/BK006944.2
	chromosome XII:NC_001144.5/BK006945.2
	chromosome XIII:NC_001145.3/BK006946.2
	chromosome XIV:NC_001146.8/BK006947.3
	chromosome XV:NC_001147.6/BK006948.2
	chromosome XVI:NC_001148.4/BK006949.2
	mitochondrion MT:NC_001224.1/KP263414.1
    }
    set LesGb {}
    set I 0
    foreach {a b} $Chromosomes {
	incr I
	set Chr [format "Chr%2.2d" $I]
	if { ! [regexp {:([^/]+)/(.+)$} $b Match NC BK]} { FaireLire "Wrong $b" }
	set FichierGb "S288C$Chr"
	if { ! [file exists $FichierGb]} { GenbankNucleotide $BK "" $FichierGb }
	lappend LesGb $FichierGb
    }
    OnRevientDuBoulot
    return $LesGb
	
}
    
proc CsvLineForS288C {} {
    set LesColonnes {
	S288C
	BK006935.2
	BK006936.2
	BK006937.2
	BK006938.2
	BK006939.2
	BK006940.2
	BK006941.2
	BK006934.2
	BK006942.2
	BK006943.2
	BK006944.2
	BK006945.2
	BK006946.2
	BK006947.3
	BK006948.2
	BK006949.2
	KP263414.1
	unknwon
	S288C
	unknwon	
    }
    set Ligne [join $LesColonnes ";"]
    return $Ligne
}
    
proc GenbankNucleotide {Id {Quoi ""} {FicOut ""}} {
    if {$Quoi==""} { set Quoi "gb" }
    if {$FicOut==""} { set FicOut "$Id.gb" }
    set Url "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$Id&rettype=$Quoi&retmode=txt"
    Espionne $Url
    set Gb [ContenuDuFichier $Url]
    if {$FicOut=="GetText"} { return $Gb }
    return [Sauve $Gb dans $FicOut]
}


Index by: file name | procedure name | procedure call | annotation
File generated 2022-04-05 at 12:55.