Index by: file name | procedure name | procedure call | annotation
gscope_blome.tcl (annotations | original source)

proc BlomeDir {} {
    return "/home/ripp/www/blome"
}

proc BlomeDbDir {} {
    return "[BlomeDir]/cgi-bin"
}

proc BlomeCreateDb {{Project ""} {KindOfClades ""} {GetWhat ""}} {

    if {$Project==""} { set Project [file tail [RepertoireDuGenome]] }

    set Bdd "[BlomeDbDir]/${Project}_${KindOfClades}.db"
    
    if {[FileExists $Bdd]} {
        if {[OuiOuNon "$Bdd already exists. Do I overwrite ?"]} {
            Garde $Bdd
	    file delete $Bdd
	} else {
            return "Error: Already exists"
	}
    }
    Espionne [CanalSql [ConnInfoForDatabase $Bdd "CreateIfNotExists"]]

    SqlExec "BEGIN TRANSACTION"

    SqlExec "DROP TABLE IF EXISTS ln_clade_organism"
    SqlExec "DROP TABLE IF EXISTS ln_organism_taxobla"
    SqlExec "DROP TABLE IF EXISTS ln_branche_taxobla"
    SqlExec "DROP TABLE IF EXISTS branche"
    SqlExec "DROP TABLE IF EXISTS sapin"
    SqlExec "DROP TABLE IF EXISTS organism"
    SqlExec "DROP TABLE IF EXISTS taxobla"
    SqlExec "DROP TABLE IF EXISTS clade"

    SqlExec "CREATE TABLE clade   (pk_clade    INTEGER , nom           VARCHAR(15))"
    SqlExec "CREATE TABLE organism(pk_organism INTEGER , taxid         VARCHAR(10), 
                                                         specie        VARCHAR(25))"
    SqlExec "CREATE TABLE taxobla (pk_taxobla  INTEGER , gscopeid      VARCHAR(10), 
                                                         chemin        VARCHAR(20),
                                                         uniprotaccess VARCHAR(12),
                                                         uniprotid     VARCHAR(15), 
                                                         gene_name     VARCHAR(20), 
                                                         gene_id       VARCHAR(20))"
    SqlExec "CREATE TABLE branche (pk_branche  INTEGER , pk_sapin   INTEGER, 
                                                         id_branche INTEGER, 
                                                         largeur    INTEGER)"

    SqlExec "CREATE TABLE ln_clade_organism   (pk_clade     INTEGER,
                                               pk_organism  INTEGER)"

    SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla   INTEGER, 
                                               pk_organism  INTEGER)"

    SqlExec "CREATE TABLE ln_branche_taxobla  (pk_branche   INTEGER, 
                                               pk_taxobla   INTEGER)"

    SqlExec "CREATE TABLE sapin               (pk_sapin     INTEGER, 
                                               pk_clade     INTEGER)"


    if {0} {
    SqlExec "CREATE TABLE clade   (pk_clade    INTEGER PRIMARY KEY AUTOINCREMENT, nom           VARCHAR(15))"
    SqlExec "CREATE TABLE organism(pk_organism INTEGER PRIMARY KEY AUTOINCREMENT, taxid         VARCHAR(10), 
                                                                                  specie        VARCHAR(25))"
    SqlExec "CREATE TABLE taxobla (pk_taxobla  INTEGER PRIMARY KEY AUTOINCREMENT, gscopeid      VARCHAR(10), 
                                                                                  chemin        VARCHAR(20),
                                                                                  uniprotaccess VARCHAR(12),
                                                                                  uniprotid     VARCHAR(15), 
                                                                                  gene_name     VARCHAR(20), 
                                                                                  gene_id       VARCHAR(20))"
    SqlExec "CREATE TABLE branche (pk_branche  INTEGER PRIMARY KEY AUTOINCREMENT, pk_sapin   INTEGER, 
                                                                                  id_branche INTEGER, 
                                                                                  largeur    INTEGER, 
                                   FOREIGN KEY(pk_sapin) REFERENCES sapin(pk_sapin))"

    SqlExec "CREATE TABLE ln_clade_organism   (pk_clade     INTEGER,
                                               pk_organism  INTEGER, 
                                   PRIMARY KEY(pk_clade, pk_organism), 
                                   FOREIGN KEY(pk_clade)    REFERENCES clade(pk_clade), 
                                   FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism))"
    SqlExec "CREATE TABLE ln_organism_taxobla (pk_taxobla   INTEGER, 
                                               pk_organism  INTEGER, 
                                   PRIMARY KEY(pk_taxobla, pk_organism), 
                                   FOREIGN KEY(pk_organism) REFERENCES organism(pk_organism), 
                                   FOREIGN KEY(pk_taxobla)  REFERENCES taxobla(pk_taxobla))"
    SqlExec "CREATE TABLE ln_branche_taxobla  (pk_branche   INTEGER, 
                                               pk_taxobla   INTEGER, 
                                   PRIMARY KEY(pk_branche, pk_taxobla), 
                                   FOREIGN KEY(pk_branche)  REFERENCES branche(pk_branche),
                                   FOREIGN KEY(pk_taxobla)  REFERENCES taxobla(pk_taxobla))"
    SqlExec "CREATE TABLE sapin               (pk_sapin     INTEGER PRIMARY KEY AUTOINCREMENT, 
                                               pk_clade     INTEGER, 
                                   FOREIGN KEY(pk_clade)    REFERENCES clade(pk_clade)) "
    }
    SqlExec "COMMIT"

    if {$GetWhat=="GetHandleAndFilename"} { return [list [CanalSql] $Bdd] }
    if {$GetWhat=="GetHandle"           } { return [CanalSql] }
    CanalSqlDisconnect
    return $Bdd
}

proc BlomeNewQuery {{Project ""} {KindOfClades ""}} {
    
    set Overwrite 1

    set RepSql "[RepertoireDuGenome]/blomesql"
    file mkdir $RepSql

    lassign [BlomeCreateDb $Project $KindOfClades "GetHandleAndFilename"] SqlHandle Bdd
    if {[regexp "^Error" $SqlHandle]} { return $SqlHandle } 
    
    set LesFichiersACharger {}

    if {$Overwrite} {
	set TaxIdQuery [NotreOX]
	
	set ListOfClades [lsort -unique [BlomeClades $KindOfClades]] 
	
	foreach Clade $ListOfClades {
	    Espionne "Clade $Clade"
	    set OrgsFromClade($Clade) [CladeContentWithinOi2017 $Clade $KindOfClades]
	    set Cardinal($Clade) [llength $OrgsFromClade($Clade)]
	    foreach TaxId $OrgsFromClade($Clade) {
		lappend AllTaxId $TaxId
		set OsFrom($TaxId) [Tax $TaxId "Name"]
		lappend LesCladesDuTaxId($TaxId) $Clade
	    }
	}
	set AllTaxId [lsort -unique -integer $AllTaxId]
	
	set PkClade 0
	set SqlPourClade {}
	foreach Clade $ListOfClades {
	    incr PkClade
	    lappend SqlPourClade "$PkClade\t$Clade"
	    set PkDuClade($Clade) $PkClade
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourClade dans "$RepSql/10_SqlPour-clade.txt"]
	
	set PkOrganism 0
	set SqlPourOrganism {}
	foreach TaxId [lsort -integer [array names OsFrom]] {
	    set Species $OsFrom($TaxId)
	    incr PkOrganism
	    lappend SqlPourOrganism "$PkOrganism\t$TaxId\t$Species"
	    set PkDuOrganismTaxId($TaxId)     $PkOrganism
	    set PkDuOrganismSpecies($Species) $PkOrganism
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourOrganism dans "$RepSql/20_SqlPour-organism.txt"]
	
	set SqlPourLnCladeOrganism {}
	foreach Clade $ListOfClades {
	    foreach TaxId $OrgsFromClade($Clade) {
		set PkOrganism $PkDuOrganismTaxId($TaxId)
		lappend SqlPourLnCladeOrganism $PkDuClade($Clade)\t$PkOrganism
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnCladeOrganism dans "$RepSql/30_SqlPour-ln_clade_organism.txt"]
	
	set SqlPourSapin {}
	foreach Clade $ListOfClades {
	    set PkSapin $PkDuClade($Clade)                            ;#rR je ne sais pas si peut y en avoir d'autres ???
	    lappend SqlPourSapin "$PkSapin\t$PkDuClade($Clade)"
	    lappend LesPkSapin $PkSapin
	    lappend LesCladesDuSapin($PkSapin) $Clade
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourSapin dans "$RepSql/40_SqlPour-sapin.txt"]
	
	set PkBranche 0
	set SqlPourBranche {}
	foreach PkSapin $LesPkSapin {
	    foreach Clade $LesCladesDuSapin($PkSapin) {
		set Largeur [llength $OrgsFromClade($Clade)]
		foreach IdBranche [NombresEntre 0 $Largeur] {
		    incr PkBranche
		    lappend SqlPourBranche $PkBranche\t$PkSapin\t$IdBranche\t$Largeur
		    set PkBrancheDe($PkSapin,$IdBranche) $PkBranche
		}
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourBranche dans "$RepSql/50_SqlPour-branche.txt"]
	
	set PkTaxobla 0
	set SqlPourTaxobla {}
	foreach Nom [ListeDesPABs] {
	    incr PkTaxobla
	    set PkTaxoblaDe($Nom) $PkTaxobla
	    set Ac [ExtraitInfo $Nom "AC:"]
	    set Id [ExtraitInfo $Nom "ID:"]
	    set Gn [ExtraitInfo $Nom "ValiGN:"] ; if {$Gn==""} { set Gn [ExtraitInfo $Nom "GN:"] }
	    set Gi [ExtraitInfo $Nom "GI:"]     ; if {$Gi==""} { set Gi [NIAG $Nom "G"] }
	    lappend SqlPourTaxobla "$PkTaxobla\t$Nom\ttaxobla\t$Ac\t$Id\t$Gn\t$Gi"
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourTaxobla dans "$RepSql/60_SqlPour-taxobla.txt"]
	
	
	set SqlPourLnOrganismTaxobla {}
	foreach Nom [ListeDesPABs] {
	    set PkTaxobla $PkTaxoblaDe($Nom)
	    foreach Clade $ListOfClades {
		set PkClade $PkDuClade($Clade)
		set NbHitsDansClade($PkTaxobla,$PkClade) 0
	    }
	    foreach Ligne [LesLignesDuFichier [GscopeFile $Nom "taxobla"]] {
		scan $Ligne "%f %f %d" Pn Score TaxId
		if {$Pn>0.001} { break }
		set Score [expr round($Score)]
		if { ! [info exists PkDuOrganismTaxId($TaxId)]} { continue }
		set PkOrganism $PkDuOrganismTaxId($TaxId)
		lappend SqlPourLnOrganismTaxobla "$PkTaxobla\t$PkOrganism"
		foreach Clade $LesCladesDuTaxId($TaxId) {
		    set PkClade $PkDuClade($Clade)
		    incr NbHitsDansClade($PkTaxobla,$PkClade)
		}
	    }
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnOrganismTaxobla dans "$RepSql/70_SqlPour-ln_organism_taxobla.txt"]
	
	set SqlPourLnBrancheTaxobla {}
	foreach {K NbHits} [array get NbHitsDansClade] {
	    lassign [split $K ","] PkTaxobla PkSapin
	    if { ! [info exists PkBrancheDe($PkSapin,$NbHits)]} {
		Espionne "not existing PkBrancheDe($PkSapin,$NbHits) PkTaxobla $PkTaxobla"
		continue
	    }
	    set PkBranche $PkBrancheDe($PkSapin,$NbHits)  ;#rR NbHits = IdBranche
	    lappend SqlPourLnBrancheTaxobla "$PkBranche\t$PkTaxobla"
	}
	lappend LesFichiersACharger [SauveLesLignes $SqlPourLnBrancheTaxobla dans "$RepSql/80_SqlPour-ln_branche_taxobla.txt"]
    } else {
	set LesFichiersACharger [lsort [glob "$RepSql/*SqlPour-*"]]
    }
    foreach Fichier $LesFichiersACharger {
	set Queue [file tail $Fichier]
	if { ! [regexp {\-([^\.]+)\.} $Queue Match Table]} { continue }
	$SqlHandle copy abort $Table $Fichier "\t"
	Espionne "$Table done"
    }

    BlomeCreateIndex

    CanalSqlDisconnect
    return $Bdd
}

proc BlomeCreateIndex {{Bdd ""}} {
    if {$Bdd!=""} { CanalSql [ConnInfoForDatabase $Bdd] }
    SqlExec "CREATE INDEX iclade    ON clade     (pk_clade, nom)"
    SqlExec "CREATE INDEX iorganism ON organism  (pk_organism, taxid, specie)"
    SqlExec "CREATE INDEX itaxobla  ON taxobla   (pk_taxobla, gscopeid)"
    SqlExec "CREATE INDEX ibranche  ON branche   (pk_branche)" 
    SqlExec "CREATE INDEX isapin    ON sapin     (pk_sapin, pk_clade)"
    SqlExec "CREATE INDEX ilnco     ON ln_clade_organism   (pk_clade, pk_organism)"
    SqlExec "CREATE INDEX ilnot     ON ln_organism_taxobla (pk_taxobla, pk_organism)"
    SqlExec "CREATE INDEX ilbt      ON ln_branche_taxobla  (pk_branche, pk_taxobla)"
    if {$Bdd!=""} { CanalSqlDisconnect ; return "" }
    return [CanalSql]
}

proc BlomeClades {{Quoi ""} {GetWhat ""}} {
    #rR Est appelé par le python Blame ... en question de science ...

    #rR Merci de me mettre à jour
    set Possibles [lsort [list Archaea CMC Cilio TTLL]]

    set Clades {}

    if {[regexp -nocase "Possible" $Quoi]} {
	set Clades $Possibles
    }

    if {$Quoi=="Archaea"} {
	set Clades [PhylAr ListOf PhyOS]
    }
    if {$Quoi=="CMC"} {
	set Clades [CMC ListOf JoinedClade]
    }
    if {$Quoi=="TTLL"} {
	set Clades [TTLL ListOf JoinedClade]
    }
    if {$Quoi=="Cilio" || [OnTraite "CilioCarta" "Like"]} {
	if {[OnTraite "CilioCarta"]} {
	    set Clades [BilanCilio ListOf JoinedClade]
	} else {
	    set Clades [QuestionDeScience "CilioCarta" "ret BilanCilio ListOf JoinedClade"]
	}
    }
    if {$GetWhat=="GetList"} { return [split $Clades " "] }
    return $Clades
}

proc CanalSqlTaxobla {{Bdd ""}} {

    if {$Bdd!="" && [FileAbsent $Bdd]} { set Bdd "[BlomeDbDir]/$Bdd" }

    Espionne $Bdd

    set Queue [file tail $Bdd]
    if { "[CanalSql "GetDbname"].db" != $Queue } {
	Espionne [CanalSql "GetDbname"]
	Espionne $Queue
	CanalSqlDisconnect
	CanalSql [ConnInfoForDatabase $Bdd]
    }
    return [CanalSql]
}

proc BlomeFilterTaxobla {{Bdd ""} {ListOfPkOrg ""} {ListOfNot ""} {ListOfCladeCounts ""}} {
    #rR Attention cette procédure se rammelle elle-même pour les Not
    CanalSqlTaxobla $Bdd

    set UseUnion [string equal -nocase $ListOfCladeCounts "UseUnion"]

    LogWscope $ListOfPkOrg
    LogWscope $ListOfNot

    if {$ListOfPkOrg=="-"} { set ListOfPkOrg "" }
    if {$ListOfNot  =="-"} { set ListOfNot   "" }

    if {[regexp {[^0-9]} $ListOfPkOrg]} {
	regsub -all {[^0-9]+} $ListOfPkOrg "," ListOfPkOrg
	set ListOfPkOrg [split [string trim $ListOfPkOrg ","] ","]
    }

    if {$ListOfPkOrg==""} {
	set LesPkTot [SqlExec "select pk_taxobla from taxobla" "GetList"]
    } else {
	set PremiereFois 1
	foreach PkO $ListOfPkOrg {
	    set Sql "select pk_taxobla from ln_organism_taxobla where pk_organism==$PkO"
	    set LesPkT [SqlExec $Sql "GetList"]
	    if {$PremiereFois} {
		set LesPkTot $LesPkT
		set PremiereFois 0
	    } else {
		if {$UseUnion} {
		    set LesPkTot [ListsUnion $LesPkTot $LesPkT]
		} else {
		    set LesPkTot [ListsIntersection $LesPkTot $LesPkT]
		}
	    }
	}
    }

    if {$ListOfNot=="GetListOfPk"} { return $LesPkTot }

    if {$ListOfNot!=""} {
	set LesNot [BlomeFilterTaxobla $Bdd $ListOfNot "GetListOfPk" "UseUnion"]
	set LesPkTot [ListsComplement $LesPkTot $LesNot]
    }

    #####################if {$ListOfCladeCounts==""} { return $LesPkTot }

    #rR on suppose qu'il n 'y a pas de Absence checkée si un nombre de species est demandé.
    #rR donc on peut filter sur le nombre voulu.
    set LesNbHitsDesClades {}
    foreach CC [split $ListOfCladeCounts ","] {
	LogWscope $CC
	scan $CC "%d/%s" PkClade CountText
	set LesBornes [split $CountText "-"]
	set D [lindex $LesBornes 0]
	set F [lindex $LesBornes end]
	set LesBonsNb {}
	set LeSql {}
	lappend LeSql "select ln_branche_taxobla.pk_taxobla from ln_branche_taxobla, branche, sapin, clade "
	lappend LeSql " where "
	lappend LeSql "              $PkClade     =              sapin.pk_clade "
	lappend LeSql "   and   sapin.pk_sapin    =            branche.pk_sapin "
	lappend LeSql "   and branche.pk_branche  = ln_branche_taxobla.pk_branche "
	lappend LeSql "   and branche.id_branche >= $D and branche.id_branche <= $F"
	set Sql [join $LeSql " "]
	regsub -all { +} $Sql " " Sql
	Espionne $Sql
	set LesBonsNb [SqlExec $Sql "GetList"]
	set Nb [llength $LesBonsNb]
	lappend LesNbHitsDesClades "$PkClade=$D-$F"
	set LesPkTot [ListsIntersection $LesPkTot $LesBonsNb]
    }
    
    set LesPk {}
    set LesGscopeId {}
    set LesGeneId {}
    set Sql "select pk_taxobla, gscopeid, gene_id from taxobla where pk_taxobla in ([join $LesPkTot ,])"
    LogWscope $Sql
    foreach {Pk GscopeId GeneId} [SqlExec $Sql "GetList"] {
	lappend LesPk       $Pk
	lappend LesGscopeId $GscopeId
	if {$GeneId!=""} { lappend LesGeneId $GeneId }
    }

    set Pks             [join $LesPk " "]
    set Noms            [join $LesGscopeId " "]
    set GeneIds         [join $LesGeneId " "]
    set NbHitsDesClades [join $LesNbHitsDesClades " "]
    set Resultat "$Pks/$Noms/$GeneIds/$NbHitsDesClades"
    return $Resultat
}

Index by: file name | procedure name | procedure call | annotation
File generated 2022-04-05 at 12:55.